diff options
Diffstat (limited to 'include/linux')
556 files changed, 15516 insertions, 6288 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6adcd1b92b20..fc372bbaa547 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -330,7 +330,6 @@ static inline bool acpi_sci_irq_valid(void) } extern int sbf_port; -extern unsigned long acpi_realmode_flags; int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); @@ -854,6 +853,11 @@ static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev) return NULL; } +static inline acpi_handle acpi_device_handle(struct acpi_device *adev) +{ + return NULL; +} + static inline bool has_acpi_companion(struct device *dev) { return false; @@ -1090,6 +1094,17 @@ static inline acpi_handle acpi_get_processor_handle(int cpu) #endif /* !CONFIG_ACPI */ +#ifdef CONFIG_ACPI_HMAT +int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, + resource_size_t *size); +#else +static inline int hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size) +{ + return -EOPNOTSUPP; +} +#endif + extern void arch_post_acpi_subsys_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC @@ -1110,13 +1125,13 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); -#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); void (*check)(void); void (*restore)(void); }; +#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); int acpi_get_lps0_constraint(struct acpi_device *adev); @@ -1125,6 +1140,13 @@ static inline int acpi_get_lps0_constraint(struct device *dev) { return ACPI_STATE_UNKNOWN; } +static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ + return -ENODEV; +} +static inline void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ +} #endif /* CONFIG_SUSPEND && CONFIG_X86 */ void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h index c637e0997f6d..abec23c7744f 100644 --- a/include/linux/adreno-smmu-priv.h +++ b/include/linux/adreno-smmu-priv.h @@ -50,6 +50,11 @@ struct adreno_smmu_fault_info { * the GPU driver must call resume_translation() * @resume_translation: Resume translation after a fault * + * @set_prr_bit: [optional] Configure the GPU's Partially Resident + * Region (PRR) bit in the ACTLR register. + * @set_prr_addr: [optional] Configure the PRR_CFG_*ADDR register with + * the physical address of PRR page passed from GPU + * driver. * * The GPU driver (drm/msm) and adreno-smmu work together for controlling * the GPU's SMMU instance. This is by necessity, as the GPU is directly @@ -67,6 +72,8 @@ struct adreno_smmu_priv { void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info); void (*set_stall)(const void *cookie, bool enabled); void (*resume_translation)(const void *cookie, bool terminate); + void (*set_prr_bit)(const void *cookie, bool set); + void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr); }; #endif /* __ADRENO_SMMU_PRIV_H */ diff --git a/include/linux/aer.h b/include/linux/aer.h index 4b97f38f3fcf..02940be66324 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -16,10 +16,26 @@ #define AER_CORRECTABLE 2 #define DPC_FATAL 3 +/* + * AER and DPC capabilities TLP Logging register sizes (PCIe r6.2, sec 7.8.4 + * & 7.9.14). + */ +#define PCIE_STD_NUM_TLP_HEADERLOG 4 +#define PCIE_STD_MAX_TLP_PREFIXLOG 4 +#define PCIE_STD_MAX_TLP_HEADERLOG (PCIE_STD_NUM_TLP_HEADERLOG + 10) + struct pci_dev; struct pcie_tlp_log { - u32 dw[4]; + union { + u32 dw[PCIE_STD_MAX_TLP_HEADERLOG]; + struct { + u32 _do_not_use[PCIE_STD_NUM_TLP_HEADERLOG]; + u32 prefix[PCIE_STD_MAX_TLP_PREFIXLOG]; + }; + }; + u8 header_len; /* Length of the Logged TLP Header in DWORDs */ + bool flit; /* TLP was logged when in Flit mode */ }; struct aer_capability_regs { @@ -37,8 +53,6 @@ struct aer_capability_regs { u16 uncor_err_source; }; -int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log); - #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); diff --git a/include/linux/align.h b/include/linux/align.h index 2b4acec7b95a..55debf105a5d 100644 --- a/include/linux/align.h +++ b/include/linux/align.h @@ -2,14 +2,6 @@ #ifndef _LINUX_ALIGN_H #define _LINUX_ALIGN_H -#include <linux/const.h> - -/* @a is a power of 2 value */ -#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) -#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) -#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) +#include <vdso/align.h> #endif /* _LINUX_ALIGN_H */ diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index a946e0203e6d..8f7931eb7d16 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #else /* ARCH_NEEDS_WEAK_PER_CPU */ +#ifdef MODULE + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section(ALLOC_TAG_SECTION_NAME) = { \ + .ct = CODE_TAG_INIT, \ + .counters = NULL }; + +#else /* MODULE */ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ @@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; +#endif /* MODULE */ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 2b90c48a6a87..062fbd4c9b77 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -31,11 +31,11 @@ struct amd_iommu_pi_data { struct task_struct; struct pci_dev; -extern int amd_iommu_detect(void); +extern void amd_iommu_detect(void); #else /* CONFIG_AMD_IOMMU */ -static inline int amd_iommu_detect(void) { return -ENODEV; } +static inline void amd_iommu_detect(void) { } #endif /* CONFIG_AMD_IOMMU */ diff --git a/include/linux/amd-pmf-io.h b/include/linux/amd-pmf-io.h index b4f818205216..6fa510f419c0 100644 --- a/include/linux/amd-pmf-io.h +++ b/include/linux/amd-pmf-io.h @@ -18,10 +18,12 @@ * enum sfh_message_type - Query the SFH message type * @MT_HPD: Message ID to know the Human presence info from MP2 FW * @MT_ALS: Message ID to know the Ambient light info from MP2 FW + * @MT_SRA: Message ID to know the SRA data from MP2 FW */ enum sfh_message_type { MT_HPD, MT_ALS, + MT_SRA, }; /** @@ -40,10 +42,23 @@ enum sfh_hpd_info { * struct amd_sfh_info - get HPD sensor info from MP2 FW * @ambient_light: Populates the ambient light information * @user_present: Populates the user presence information + * @platform_type: Operating modes (clamshell, flat, tent, etc.) + * @laptop_placement: Device states (ontable, onlap, outbag) */ struct amd_sfh_info { u32 ambient_light; u8 user_present; + u32 platform_type; + u32 laptop_placement; +}; + +enum laptop_placement { + LP_UNKNOWN = 0, + ON_TABLE, + ON_LAP_MOTION, + IN_BAG, + OUT_OF_BAG, + LP_UNDEFINED, }; int amd_get_sfh_info(struct amd_sfh_info *sfh_info, enum sfh_message_type op); diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 67f6fdf2e7cd..a3863da1510e 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -179,6 +179,9 @@ #define ARM_SMCCC_KVM_FUNC_PKVM_RESV_62 62 #define ARM_SMCCC_KVM_FUNC_PKVM_RESV_63 63 /* End of pKVM hypercall range */ +#define ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_VER 64 +#define ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_CPUS 65 + #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -225,6 +228,18 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_MMIO_GUARD) +#define ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_VER_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_VER) + +#define ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_CPUS) + /* ptp_kvm counter type ID */ #define KVM_PTP_VIRT_COUNTER 0 #define KVM_PTP_PHYS_COUNTER 1 @@ -639,5 +654,45 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ }) +#ifdef CONFIG_ARM64 + +#define __fail_smccc_1_2(___res) \ + do { \ + if (___res) \ + ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ + } while (0) + +/* + * arm_smccc_1_2_invoke() - make an SMCCC v1.2 compliant call + * + * @args: SMC args are in the a0..a17 fields of the arm_smcc_1_2_regs structure + * @res: result values from registers 0 to 17 + * + * This macro will make either an HVC call or an SMC call depending on the + * current SMCCC conduit. If no valid conduit is available then -1 + * (SMCCC_RET_NOT_SUPPORTED) is returned in @res.a0 (if supplied). + * + * The return value also provides the conduit that was used. + */ +#define arm_smccc_1_2_invoke(args, res) ({ \ + struct arm_smccc_1_2_regs *__args = args; \ + struct arm_smccc_1_2_regs *__res = res; \ + int method = arm_smccc_1_1_get_conduit(); \ + switch (method) { \ + case SMCCC_CONDUIT_HVC: \ + arm_smccc_1_2_hvc(__args, __res); \ + break; \ + case SMCCC_CONDUIT_SMC: \ + arm_smccc_1_2_smc(__args, __res); \ + break; \ + default: \ + __fail_smccc_1_2(__res); \ + method = SMCCC_CONDUIT_NONE; \ + break; \ + } \ + method; \ + }) +#endif /*CONFIG_ARM64*/ + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 74169dd0f659..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -112,6 +112,7 @@ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) #define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) #define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1) +#define FFA_VERSION_1_2 FFA_PACK_VERSION_INFO(1, 2) /** * FF-A specification mentions explicitly about '4K pages'. This should @@ -176,6 +177,7 @@ void ffa_device_unregister(struct ffa_device *ffa_dev); int ffa_driver_register(struct ffa_driver *driver, struct module *owner, const char *mod_name); void ffa_driver_unregister(struct ffa_driver *driver); +void ffa_devices_unregister(void); bool ffa_device_is_valid(struct ffa_device *ffa_dev); #else @@ -188,6 +190,8 @@ ffa_device_register(const struct ffa_partition_info *part_info, static inline void ffa_device_unregister(struct ffa_device *dev) {} +static inline void ffa_devices_unregister(void) {} + static inline int ffa_driver_register(struct ffa_driver *driver, struct module *owner, const char *mod_name) @@ -237,8 +241,12 @@ struct ffa_partition_info { #define FFA_PARTITION_NOTIFICATION_RECV BIT(3) /* partition runs in the AArch64 execution state. */ #define FFA_PARTITION_AARCH64_EXEC BIT(8) +/* partition supports receipt of direct request2 */ +#define FFA_PARTITION_DIRECT_REQ2_RECV BIT(9) +/* partition can send direct request2. */ +#define FFA_PARTITION_DIRECT_REQ2_SEND BIT(10) u32 properties; - u32 uuid[4]; + uuid_t uuid; }; static inline @@ -256,6 +264,10 @@ bool ffa_partition_check_property(struct ffa_device *dev, u32 property) #define ffa_partition_supports_direct_recv(dev) \ ffa_partition_check_property(dev, FFA_PARTITION_DIRECT_RECV) +#define ffa_partition_supports_direct_req2_recv(dev) \ + (ffa_partition_check_property(dev, FFA_PARTITION_DIRECT_REQ2_RECV) && \ + !dev->mode_32bit) + /* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP} which pass data via registers */ struct ffa_send_direct_data { unsigned long data0; /* w3/x3 */ @@ -271,6 +283,8 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; + uuid_t uuid; }; /* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP}2 which pass data via registers */ @@ -439,7 +453,7 @@ struct ffa_msg_ops { int (*sync_send_receive)(struct ffa_device *dev, struct ffa_send_direct_data *data); int (*indirect_send)(struct ffa_device *dev, void *buf, size_t sz); - int (*sync_send_receive2)(struct ffa_device *dev, const uuid_t *uuid, + int (*sync_send_receive2)(struct ffa_device *dev, struct ffa_send_direct_data2 *data); }; @@ -455,6 +469,7 @@ struct ffa_cpu_ops { typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data); +typedef void (*ffa_fwk_notifier_cb)(int notify_id, void *cb_data, void *buf); struct ffa_notifier_ops { int (*sched_recv_cb_register)(struct ffa_device *dev, @@ -463,6 +478,10 @@ struct ffa_notifier_ops { int (*notify_request)(struct ffa_device *dev, bool per_vcpu, ffa_notifier_cb cb, void *cb_data, int notify_id); int (*notify_relinquish)(struct ffa_device *dev, int notify_id); + int (*fwk_notify_request)(struct ffa_device *dev, + ffa_fwk_notifier_cb cb, void *cb_data, + int notify_id); + int (*fwk_notify_relinquish)(struct ffa_device *dev, int notify_id); int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu, u16 vcpu); }; diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 255701e1251b..f652a5028b59 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,12 +46,12 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); -void __init sdei_init(void); +void __init acpi_sdei_init(void); void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } -static inline void sdei_init(void) { } +static inline void acpi_sdei_init(void) { } static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5cc73d7e5b52..1ca9f9e05f4f 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -168,11 +168,6 @@ async_xor_offs(struct page *dest, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum sum_check_flags *result, - struct async_submit_ctl *submit); - -struct dma_async_tx_descriptor * async_xor_val_offs(struct page *dest, unsigned int offset, struct page **src_list, unsigned int *src_offset, int src_cnt, size_t len, enum sum_check_flags *result, diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9b02961d65ee..45f2f278b50a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->atm_options = vcc->atm_options; } +static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, + &sk_atm(vcc)->sk_wmem_alloc)); +} + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 13a11f3c09b8..cf0afa60e4a7 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -154,7 +154,10 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, - /* opcode 57 - 65 are reserved */ + /* opcode 58 and 59 are reserved */ + VIRTCHNL_OP_1588_PTP_GET_CAPS = 60, + VIRTCHNL_OP_1588_PTP_GET_TIME = 61, + /* opcode 62 - 65 are reserved */ VIRTCHNL_OP_GET_QOS_CAPS = 66, /* opcode 68 through 111 are reserved */ VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, @@ -270,6 +273,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) #define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) +#define VIRTCHNL_VF_CAP_PTP BIT(31) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -309,6 +313,60 @@ struct virtchnl_txq_info { VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); +/* RX descriptor IDs (range from 0 to 63) */ +enum virtchnl_rx_desc_ids { + VIRTCHNL_RXDID_0_16B_BASE = 0, + VIRTCHNL_RXDID_1_32B_BASE = 1, + VIRTCHNL_RXDID_2_FLEX_SQ_NIC = 2, + VIRTCHNL_RXDID_3_FLEX_SQ_SW = 3, + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB = 4, + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL = 5, + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2 = 6, + VIRTCHNL_RXDID_7_HW_RSVD = 7, + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC = 16, + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN = 17, + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4 = 18, + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6 = 19, + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW = 20, + VIRTCHNL_RXDID_21_COMMS_AUX_TCP = 21, + /* 22 through 63 are reserved */ +}; + +#define VIRTCHNL_RXDID_BIT(x) BIT_ULL(VIRTCHNL_RXDID_##x) + +/* RX descriptor ID bitmasks */ +enum virtchnl_rx_desc_id_bitmasks { + VIRTCHNL_RXDID_0_16B_BASE_M = VIRTCHNL_RXDID_BIT(0_16B_BASE), + VIRTCHNL_RXDID_1_32B_BASE_M = VIRTCHNL_RXDID_BIT(1_32B_BASE), + VIRTCHNL_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL_RXDID_BIT(2_FLEX_SQ_NIC), + VIRTCHNL_RXDID_3_FLEX_SQ_SW_M = VIRTCHNL_RXDID_BIT(3_FLEX_SQ_SW), + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB_M = VIRTCHNL_RXDID_BIT(4_FLEX_SQ_NIC_VEB), + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL_M = VIRTCHNL_RXDID_BIT(5_FLEX_SQ_NIC_ACL), + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2_M = VIRTCHNL_RXDID_BIT(6_FLEX_SQ_NIC_2), + VIRTCHNL_RXDID_7_HW_RSVD_M = VIRTCHNL_RXDID_BIT(7_HW_RSVD), + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC_M = VIRTCHNL_RXDID_BIT(16_COMMS_GENERIC), + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN_M = VIRTCHNL_RXDID_BIT(17_COMMS_AUX_VLAN), + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4_M = VIRTCHNL_RXDID_BIT(18_COMMS_AUX_IPV4), + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6_M = VIRTCHNL_RXDID_BIT(19_COMMS_AUX_IPV6), + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW_M = VIRTCHNL_RXDID_BIT(20_COMMS_AUX_FLOW), + VIRTCHNL_RXDID_21_COMMS_AUX_TCP_M = VIRTCHNL_RXDID_BIT(21_COMMS_AUX_TCP), + /* 22 through 63 are reserved */ +}; + +/* virtchnl_rxq_info_flags - definition of bits in the flags field of the + * virtchnl_rxq_info structure. + * + * @VIRTCHNL_PTP_RX_TSTAMP: request to enable Rx timestamping + * + * Other flag bits are currently reserved and they may be extended in the + * future. + */ +enum virtchnl_rxq_info_flags { + VIRTCHNL_PTP_RX_TSTAMP = BIT(0), +}; + /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. @@ -331,8 +389,14 @@ struct virtchnl_rxq_info { u32 databuffer_size; u32 max_pkt_size; u8 crc_disable; - u8 rxdid; - u8 pad1[2]; + /* see enum virtchnl_rx_desc_ids; + * only used when VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is supported. Note + * that when the offload is not supported, the descriptor format aligns + * with VIRTCHNL_RXDID_1_32B_BASE. + */ + enum virtchnl_rx_desc_ids rxdid:8; + enum virtchnl_rxq_info_flags flags:8; /* see virtchnl_rxq_info_flags */ + u8 pad1; u64 dma_ring_addr; /* see enum virtchnl_rx_hsplit; deprecated with AVF 1.0 */ @@ -1032,10 +1096,6 @@ struct virtchnl_filter { VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); -struct virtchnl_supported_rxdids { - u64 supported_rxdids; -}; - /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -1283,7 +1343,7 @@ struct virtchnl_proto_hdrs { * 2 - from the second inner layer * .... **/ - int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ + u32 count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ union { struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; @@ -1335,7 +1395,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action); struct virtchnl_filter_action_set { /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */ - int count; + u32 count; struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS]; }; @@ -1425,6 +1485,61 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +#define VIRTCHNL_1588_PTP_CAP_RX_TSTAMP BIT(1) +#define VIRTCHNL_1588_PTP_CAP_READ_PHC BIT(2) + +/** + * struct virtchnl_ptp_caps - Defines the PTP caps available to the VF. + * @caps: On send, VF sets what capabilities it requests. On reply, PF + * indicates what has been enabled for this VF. The PF shall not set + * bits which were not requested by the VF. + * @rsvd: Reserved bits for future extension. + * + * Structure that defines the PTP capabilities available to the VF. The VF + * sends VIRTCHNL_OP_1588_PTP_GET_CAPS, and must fill in the ptp_caps field + * indicating what capabilities it is requesting. The PF will respond with the + * same message with the virtchnl_ptp_caps structure indicating what is + * enabled for the VF. + * + * VIRTCHNL_1588_PTP_CAP_RX_TSTAMP indicates that the VF receive queues have + * receive timestamps enabled in the flexible descriptors. Note that this + * requires a VF to also negotiate to enable advanced flexible descriptors in + * the receive path instead of the default legacy descriptor format. + * + * VIRTCHNL_1588_PTP_CAP_READ_PHC indicates that the VF may read the PHC time + * via the VIRTCHNL_OP_1588_PTP_GET_TIME command. + * + * Note that in the future, additional capability flags may be added which + * indicate additional extended support. All fields marked as reserved by this + * header will be set to zero. VF implementations should verify this to ensure + * that future extensions do not break compatibility. + */ +struct virtchnl_ptp_caps { + u32 caps; + u8 rsvd[44]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(48, virtchnl_ptp_caps); + +/** + * struct virtchnl_phc_time - Contains the 64bits of PHC clock time in ns. + * @time: PHC time in nanoseconds + * @rsvd: Reserved for future extension + * + * Structure received with VIRTCHNL_OP_1588_PTP_GET_TIME. Contains the 64bits + * of PHC clock time in nanoseconds. + * + * VIRTCHNL_OP_1588_PTP_GET_TIME may be sent to request the current time of + * the PHC. This op is available in case direct access via the PHC registers + * is not available. + */ +struct virtchnl_phc_time { + u64 time; + u8 rsvd[8]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_phc_time); + struct virtchnl_shaper_bw { /* Unit is Kbps */ u32 committed; @@ -1757,6 +1872,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, } } break; + case VIRTCHNL_OP_1588_PTP_GET_CAPS: + valid_len = sizeof(struct virtchnl_ptp_caps); + break; + case VIRTCHNL_OP_1588_PTP_GET_TIME: + valid_len = sizeof(struct virtchnl_phc_time); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8e7af9a03b41..e721148c95d0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -249,6 +249,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && + (inode->i_sb->s_iflags & SB_I_CGROUPWB) && (!lockdep_is_held(&inode->i_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 670f2dae692f..996493917f36 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -48,11 +48,11 @@ struct badblocks_context { int ack; }; -int badblocks_check(struct badblocks *bb, sector_t s, int sectors, - sector_t *first_bad, int *bad_sectors); -int badblocks_set(struct badblocks *bb, sector_t s, int sectors, - int acknowledged); -int badblocks_clear(struct badblocks *bb, sector_t s, int sectors); +int badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors, + sector_t *first_bad, sector_t *bad_sectors); +bool badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors, + int acknowledged); +bool badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors); void ack_all_badblocks(struct badblocks *bb); ssize_t badblocks_show(struct badblocks *bb, char *page, int unack); ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 3305c849abd6..1625c8529e70 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -44,7 +44,12 @@ struct linux_binprm { */ point_of_no_return:1, /* Set when "comm" must come from the dentry. */ - comm_from_dentry:1; + comm_from_dentry:1, + /* + * Set by user space to check executability according to the + * caller's environment. + */ + is_check:1; struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; @@ -59,7 +64,7 @@ struct linux_binprm { const char *fdpath; /* generated filename for execveat */ unsigned interp_flags; int execfd; /* File descriptor of the executable */ - unsigned long loader, exec; + unsigned long exec; struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */ diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index dbf0f74c1529..0a25716820fe 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -7,29 +7,28 @@ enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ - BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ - BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ - BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ - BIP_COPY_USER = 1 << 5, /* Kernel bounce buffer in use */ + BIP_DISK_NOCHECK = 1 << 2, /* disable disk integrity checking */ + BIP_IP_CHECKSUM = 1 << 3, /* IP checksum */ + BIP_COPY_USER = 1 << 4, /* Kernel bounce buffer in use */ + BIP_CHECK_GUARD = 1 << 5, /* guard check */ + BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ + BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ }; struct bio_integrity_payload { - struct bio *bip_bio; /* parent bio */ - struct bvec_iter bip_iter; unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ - - struct bvec_iter bio_iter; /* for rewinding parent bio */ - - struct work_struct bip_work; /* I/O completion */ + u16 app_tag; /* application tag value */ struct bio_vec *bip_vec; - struct bio_vec bip_inline_vecs[];/* embedded bvec array */ }; +#define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \ + BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) + #ifdef CONFIG_BLK_DEV_INTEGRITY #define bip_for_each_vec(bvl, bip, iter) \ @@ -68,19 +67,19 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip, bip->bip_iter.bi_sector = seed; } +void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip, + struct bio_vec *bvecs, unsigned int nr_vecs); struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); -int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len); +int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter); +int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta); void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); void bio_integrity_trim(struct bio *bio); int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask); -int bioset_integrity_create(struct bio_set *bs, int pool_size); -void bioset_integrity_free(struct bio_set *bs); -void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ @@ -89,17 +88,12 @@ static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) return NULL; } -static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) -{ - return 0; -} - -static inline void bioset_integrity_free(struct bio_set *bs) +static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) { + return -EINVAL; } -static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, - ssize_t len) +static inline int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta) { return -EINVAL; } @@ -128,10 +122,6 @@ static inline void bio_integrity_trim(struct bio *bio) { } -static inline void bio_integrity_init(void) -{ -} - static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { return false; diff --git a/include/linux/bio.h b/include/linux/bio.h index 7a1b3b1a8fed..b474a47ec7ee 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -11,6 +11,7 @@ #include <linux/uio.h> #define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; @@ -19,9 +20,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs) return min(nr_segs, BIO_MAX_VECS); } -#define bio_prio(bio) (bio)->bi_ioprio -#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) - #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) @@ -293,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); @@ -416,8 +414,6 @@ int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, unsigned off); bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off); -extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, - unsigned int, unsigned int); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, @@ -630,10 +626,6 @@ struct bio_set { mempool_t bio_pool; mempool_t bvec_pool; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t bio_integrity_pool; - mempool_t bvec_integrity_pool; -#endif unsigned int back_pad; /* diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h index bbc4730a6505..c0989b5b0407 100644 --- a/include/linux/bit_spinlock.h +++ b/include/linux/bit_spinlock.h @@ -13,7 +13,7 @@ * Don't use this unless you really need to: spin_lock() and spin_unlock() * are significantly faster. */ -static inline void bit_spin_lock(int bitnum, unsigned long *addr) +static __always_inline void bit_spin_lock(int bitnum, unsigned long *addr) { /* * Assuming the lock is uncontended, this never enters @@ -38,7 +38,7 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr) /* * Return true if it was acquired */ -static inline int bit_spin_trylock(int bitnum, unsigned long *addr) +static __always_inline int bit_spin_trylock(int bitnum, unsigned long *addr) { preempt_disable(); #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -54,7 +54,7 @@ static inline int bit_spin_trylock(int bitnum, unsigned long *addr) /* * bit-based spin_unlock() */ -static inline void bit_spin_unlock(int bitnum, unsigned long *addr) +static __always_inline void bit_spin_unlock(int bitnum, unsigned long *addr) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); @@ -71,7 +71,7 @@ static inline void bit_spin_unlock(int bitnum, unsigned long *addr) * non-atomic version, which can be used eg. if the bit lock itself is * protecting the rest of the flags in the word. */ -static inline void __bit_spin_unlock(int bitnum, unsigned long *addr) +static __always_inline void __bit_spin_unlock(int bitnum, unsigned long *addr) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 262b6596eca5..595217b7a6e7 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -23,7 +23,7 @@ struct device; * * Function implementations generic to all architectures are in * lib/bitmap.c. Functions implementations that are architecture - * specific are in various include/asm-<arch>/bitops.h headers + * specific are in various arch/<arch>/include/asm/bitops.h headers * and other arch/<arch> specific files. * * See lib/bitmap.c for more details. @@ -560,9 +560,9 @@ void bitmap_replace(unsigned long *dst, * ...0..11...0..10 * dst: 0000001100000010 * - * A relationship exists between bitmap_scatter() and bitmap_gather(). + * A relationship exists between bitmap_scatter() and bitmap_gather(). See + * bitmap_gather() for the bitmap gather detailed operations. TL;DR: * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. - * See bitmap_scatter() for details related to this relationship. */ static __always_inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, @@ -608,7 +608,9 @@ void bitmap_scatter(unsigned long *dst, const unsigned long *src, * dst: 0000000000011010 * * A relationship exists between bitmap_gather() and bitmap_scatter(). See - * bitmap_scatter() for the bitmap scatter detailed operations. + * bitmap_scatter() for the bitmap scatter detailed operations. TL;DR: + * bitmap_scatter() can be seen as the 'reverse' bitmap_gather() operation. + * * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n). * The operation bitmap_gather(result, scattered, mask, n) leads to a result * equal or equivalent to src. diff --git a/include/linux/bitops.h b/include/linux/bitops.h index ba35bbf07798..c1cb53cf2f0f 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -230,6 +230,37 @@ static inline int get_count_order_long(unsigned long l) } /** + * parity8 - get the parity of an u8 value + * @value: the value to be examined + * + * Determine the parity of the u8 argument. + * + * Returns: + * 0 for even parity, 1 for odd parity + * + * Note: This function informs you about the current parity. Example to bail + * out when parity is odd: + * + * if (parity8(val) == 1) + * return -EBADMSG; + * + * If you need to calculate a parity bit, you need to draw the conclusion from + * this result yourself. Example to enforce odd parity, parity bit is bit 7: + * + * if (parity8(val) == 0) + * val ^= BIT(7); + */ +static inline int parity8(u8 val) +{ + /* + * One explanation of this algorithm: + * https://funloop.org/codex/problem/parity/README.html + */ + val ^= val >> 4; + return (0x6996 >> (val & 0xf)) & 1; +} + +/** * __ffs64 - find first set bit in a 64 bit word * @word: The 64 bit word * diff --git a/include/linux/bits.h b/include/linux/bits.h index 60044b608817..14fd0ca9a6cd 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -20,9 +20,8 @@ */ #if !defined(__ASSEMBLY__) #include <linux/build_bug.h> -#define GENMASK_INPUT_CHECK(h, l) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((l) > (h)), (l) > (h), 0))) +#include <linux/compiler.h> +#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, @@ -41,7 +40,7 @@ * Missing asm support * * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __init128' data + * in the asm code, as it shifts an 'unsigned __int128' data * type instead of direct representation of 128 bit constants * such as long and unsigned long. The fundamental problem is * that a 128 bit constant will get silently truncated by the diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index 90ab33cb5d0e..4f39e9cd7576 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -57,6 +57,62 @@ struct blk_crypto_ll_ops { int (*keyslot_evict)(struct blk_crypto_profile *profile, const struct blk_crypto_key *key, unsigned int slot); + + /** + * @derive_sw_secret: Derive the software secret from a hardware-wrapped + * key in ephemerally-wrapped form. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * Must return 0 on success, -EBADMSG if the key is invalid, or another + * -errno code on other errors. + */ + int (*derive_sw_secret)(struct blk_crypto_profile *profile, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); + + /** + * @import_key: Create a hardware-wrapped key by importing a raw key. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * On success, must write the new key in long-term wrapped form to + * @lt_key and return its size in bytes. On failure, must return a + * -errno value. + */ + int (*import_key)(struct blk_crypto_profile *profile, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + + /** + * @generate_key: Generate a hardware-wrapped key. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * On success, must write the new key in long-term wrapped form to + * @lt_key and return its size in bytes. On failure, must return a + * -errno value. + */ + int (*generate_key)(struct blk_crypto_profile *profile, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + + /** + * @prepare_key: Prepare a hardware-wrapped key to be used. + * + * Prepare a hardware-wrapped key to be used by converting it from + * long-term wrapped form to ephemerally-wrapped form. This only needs + * to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED is supported. + * + * On success, must write the key in ephemerally-wrapped form to + * @eph_key and return its size in bytes. On failure, must return + * -EBADMSG if the key is invalid, or another -errno on other error. + */ + int (*prepare_key)(struct blk_crypto_profile *profile, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); }; /** @@ -85,6 +141,12 @@ struct blk_crypto_profile { unsigned int max_dun_bytes_supported; /** + * @key_types_supported: A bitmask of the supported key types: + * BLK_CRYPTO_KEY_TYPE_RAW and/or BLK_CRYPTO_KEY_TYPE_HW_WRAPPED. + */ + unsigned int key_types_supported; + + /** * @modes_supported: Array of bitmasks that specifies whether each * combination of crypto mode and data unit size is supported. * Specifically, the i'th bit of modes_supported[crypto_mode] is set if @@ -143,6 +205,17 @@ void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); +int blk_crypto_import_key(struct blk_crypto_profile *profile, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + +int blk_crypto_generate_key(struct blk_crypto_profile *profile, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + +int blk_crypto_prepare_key(struct blk_crypto_profile *profile, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, const struct blk_crypto_profile *child); diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 5e5822c18ee4..58b0c5254a67 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -6,7 +6,9 @@ #ifndef __LINUX_BLK_CRYPTO_H #define __LINUX_BLK_CRYPTO_H +#include <linux/minmax.h> #include <linux/types.h> +#include <uapi/linux/blk-crypto.h> enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_INVALID, @@ -17,7 +19,55 @@ enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_MAX, }; -#define BLK_CRYPTO_MAX_KEY_SIZE 64 +/* + * Supported types of keys. Must be bitflags due to their use in + * blk_crypto_profile::key_types_supported. + */ +enum blk_crypto_key_type { + /* + * Raw keys (i.e. "software keys"). These keys are simply kept in raw, + * plaintext form in kernel memory. + */ + BLK_CRYPTO_KEY_TYPE_RAW = 0x1, + + /* + * Hardware-wrapped keys. These keys are only present in kernel memory + * in ephemerally-wrapped form, and they can only be unwrapped by + * dedicated hardware. For details, see the "Hardware-wrapped keys" + * section of Documentation/block/inline-encryption.rst. + */ + BLK_CRYPTO_KEY_TYPE_HW_WRAPPED = 0x2, +}; + +/* + * Currently the maximum raw key size is 64 bytes, as that is the key size of + * BLK_ENCRYPTION_MODE_AES_256_XTS which takes the longest key. + * + * The maximum hardware-wrapped key size depends on the hardware's key wrapping + * algorithm, which is a hardware implementation detail, so it isn't precisely + * specified. But currently 128 bytes is plenty in practice. Implementations + * are recommended to wrap a 32-byte key for the hardware KDF with AES-256-GCM, + * which should result in a size closer to 64 bytes than 128. + * + * Both of these values can trivially be increased if ever needed. + */ +#define BLK_CRYPTO_MAX_RAW_KEY_SIZE 64 +#define BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE 128 + +#define BLK_CRYPTO_MAX_ANY_KEY_SIZE \ + MAX(BLK_CRYPTO_MAX_RAW_KEY_SIZE, BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE) + +/* + * Size of the "software secret" which can be derived from a hardware-wrapped + * key. This is currently always 32 bytes. Note, the choice of 32 bytes + * assumes that the software secret is only used directly for algorithms that + * don't require more than a 256-bit key to get the desired security strength. + * If it were to be used e.g. directly as an AES-256-XTS key, then this would + * need to be increased (which is possible if hardware supports it, but care + * would need to be taken to avoid breaking users who need exactly 32 bytes). + */ +#define BLK_CRYPTO_SW_SECRET_SIZE 32 + /** * struct blk_crypto_config - an inline encryption key's crypto configuration * @crypto_mode: encryption algorithm this key is for @@ -26,20 +76,23 @@ enum blk_crypto_mode_num { * ciphertext. This is always a power of 2. It might be e.g. the * filesystem block size or the disk sector size. * @dun_bytes: the maximum number of bytes of DUN used when using this key + * @key_type: the type of this key -- either raw or hardware-wrapped */ struct blk_crypto_config { enum blk_crypto_mode_num crypto_mode; unsigned int data_unit_size; unsigned int dun_bytes; + enum blk_crypto_key_type key_type; }; /** * struct blk_crypto_key - an inline encryption key - * @crypto_cfg: the crypto configuration (like crypto_mode, key size) for this - * key + * @crypto_cfg: the crypto mode, data unit size, key type, and other + * characteristics of this key and how it will be used * @data_unit_size_bits: log2 of data_unit_size - * @size: size of this key in bytes (determined by @crypto_cfg.crypto_mode) - * @raw: the raw bytes of this key. Only the first @size bytes are used. + * @size: size of this key in bytes. The size of a raw key is fixed for a given + * crypto mode, but the size of a hardware-wrapped key can vary. + * @bytes: the bytes of this key. Only the first @size bytes are significant. * * A blk_crypto_key is immutable once created, and many bios can reference it at * the same time. It must not be freed until all bios using it have completed @@ -49,7 +102,7 @@ struct blk_crypto_key { struct blk_crypto_config crypto_cfg; unsigned int data_unit_size_bits; unsigned int size; - u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; + u8 bytes[BLK_CRYPTO_MAX_ANY_KEY_SIZE]; }; #define BLK_CRYPTO_MAX_IV_SIZE 32 @@ -87,7 +140,9 @@ bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, unsigned int bytes, const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]); -int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *key_bytes, size_t key_size, + enum blk_crypto_key_type key_type, enum blk_crypto_mode_num crypto_mode, unsigned int dun_bytes, unsigned int data_unit_size); @@ -103,6 +158,10 @@ bool blk_crypto_config_supported_natively(struct block_device *bdev, bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg); +int blk_crypto_derive_sw_secret(struct block_device *bdev, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool bio_has_crypt_ctx(struct bio *bio) diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h deleted file mode 100644 index ca544e1d3508..000000000000 --- a/include/linux/blk-mq-pci.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_PCI_H -#define _LINUX_BLK_MQ_PCI_H - -struct blk_mq_queue_map; -struct pci_dev; - -void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset); - -#endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h deleted file mode 100644 index 13226e9b22dd..000000000000 --- a/include/linux/blk-mq-virtio.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_VIRTIO_H -#define _LINUX_BLK_MQ_VIRTIO_H - -struct blk_mq_queue_map; -struct virtio_device; - -void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, - struct virtio_device *vdev, int first_vec); - -#endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7b19b83349cf..8eb9b3310167 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -28,7 +28,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); typedef __u32 __bitwise req_flags_t; /* Keep rqf_name[] in sync with the definitions below */ -enum { +enum rqf_flags { /* drive already may have started this one */ __RQF_STARTED, /* request for flush sequence */ @@ -296,13 +296,6 @@ enum blk_eh_timer_return { BLK_EH_RESET_TIMER, }; -/* Keep alloc_policy_name[] in sync with the definitions below */ -enum { - BLK_TAG_ALLOC_FIFO, /* allocate starting from 0 */ - BLK_TAG_ALLOC_RR, /* allocate starting from last allocated tag */ - BLK_TAG_ALLOC_MAX -}; - /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device @@ -668,7 +661,6 @@ struct blk_mq_ops { /* Keep hctx_flag_name[] in sync with the definitions below */ enum { - BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for @@ -677,23 +669,20 @@ enum { BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 4, - /* Do not allow an I/O scheduler to be configured. */ - BLK_MQ_F_NO_SCHED = 1 << 5, + + /* + * Alloc tags on a round-robin base instead of the first available one. + */ + BLK_MQ_F_TAG_RR = 1 << 5, /* * Select 'none' during queue registration in case of a single hwq * or shared hwqs instead of 'mq-deadline'. */ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, - BLK_MQ_F_ALLOC_POLICY_START_BIT = 7, - BLK_MQ_F_ALLOC_POLICY_BITS = 1, + + BLK_MQ_F_MAX = 1 << 7, }; -#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ - ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ - ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) -#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ - ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ - << BLK_MQ_F_ALLOC_POLICY_START_BIT) #define BLK_MQ_MAX_DEPTH (10240) #define BLK_MQ_NO_HCTX_IDX (-1U) @@ -863,12 +852,20 @@ static inline bool blk_mq_is_reserved_rq(struct request *rq) return rq->rq_flags & RQF_RESV; } -/* +/** + * blk_mq_add_to_batch() - add a request to the completion batch + * @req: The request to add to batch + * @iob: The batch to add the request + * @is_error: Specify true if the request failed with an error + * @complete: The completaion handler for the request + * * Batched completions only work when there is no I/O error and no special * ->end_io handler. + * + * Return: true when the request was added to the batch, otherwise false */ static inline bool blk_mq_add_to_batch(struct request *req, - struct io_comp_batch *iob, int ioerror, + struct io_comp_batch *iob, bool is_error, void (*complete)(struct io_comp_batch *)) { /* @@ -876,7 +873,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, * 1) No batch container * 2) Has scheduler data attached * 3) Not a passthrough request and end_io set - * 4) Not a passthrough request and an ioerror + * 4) Not a passthrough request and failed with an error */ if (!iob) return false; @@ -885,7 +882,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, if (!blk_rq_is_passthrough(req)) { if (req->end_io) return false; - if (ioerror < 0) + if (is_error) return false; } @@ -921,8 +918,22 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); -void blk_mq_freeze_queue(struct request_queue *q); -void blk_mq_unfreeze_queue(struct request_queue *q); +void blk_mq_freeze_queue_nomemsave(struct request_queue *q); +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); +static inline unsigned int __must_check +blk_mq_freeze_queue(struct request_queue *q) +{ + unsigned int memflags = memalloc_noio_save(); + + blk_mq_freeze_queue_nomemsave(q); + return memflags; +} +static inline void +blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) +{ + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); +} void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, @@ -931,6 +942,8 @@ void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); void blk_freeze_queue_start_non_owner(struct request_queue *q); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); +void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, + struct device *dev, unsigned int offset); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); @@ -987,14 +1000,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } -static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, - unsigned int nr_segs) -{ - rq->nr_phys_segments = nr_segs; - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; -} - void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key); @@ -1168,14 +1173,13 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) return max_t(unsigned short, rq->nr_phys_segments, 1); } -int __blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist, struct scatterlist **last_sg); -static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist) +int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, + struct scatterlist **last_sg); +static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) { struct scatterlist *last_sg = NULL; - return __blk_rq_map_sg(q, rq, sglist, &last_sg); + return __blk_rq_map_sg(rq, sglist, &last_sg); } void blk_dump_rq_flags(struct request *, char *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 315899779af1..9a1f0ee40b56 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -268,10 +268,16 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +/* + * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) + * however we constrain this to what we can validate and test. + */ +#define BLK_MAX_BLOCK_SIZE SZ_64K + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) { - if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + if (bsize < 512 || bsize > BLK_MAX_BLOCK_SIZE || !is_power_of_2(bsize)) return -EINVAL; return 0; @@ -332,8 +338,8 @@ typedef unsigned int __bitwise blk_features_t; #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) -/* stacked device can/does support atomic writes */ -#define BLK_FEAT_ATOMIC_WRITES_STACKED \ +/* atomic writes enabled */ +#define BLK_FEAT_ATOMIC_WRITES \ ((__force blk_features_t)(1u << 16)) /* @@ -368,6 +374,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_user_sectors; unsigned int max_segment_size; + unsigned int min_segment_size; unsigned int physical_block_size; unsigned int logical_block_size; unsigned int alignment_offset; @@ -561,8 +568,22 @@ struct request_queue { struct blk_flush_queue *fq; struct list_head flush_list; + /* + * Protects against I/O scheduler switching, particularly when updating + * q->elevator. Since the elevator update code path may also modify q-> + * nr_requests and wbt latency, this lock also protects the sysfs attrs + * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update + * may modify hctx tags, reserved-tags and cpumask, so this lock also + * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking + * order during an elevator or nr_hw_queue update, first freeze the + * queue, then acquire ->elevator_lock. + */ + struct mutex elevator_lock; + struct mutex sysfs_lock; - struct mutex sysfs_dir_lock; + /* + * Protects queue limits and also sysfs attribute read_ahead_kb. + */ struct mutex limits_lock; /* @@ -582,6 +603,12 @@ struct request_queue { #ifdef CONFIG_LOCKDEP struct task_struct *mq_freeze_owner; int mq_freeze_owner_depth; + /* + * Records disk & queue state in current context, used in unfreeze + * queue + */ + bool mq_freeze_disk_dead; + bool mq_freeze_queue_dying; #endif wait_queue_head_t mq_freeze_wq; /* @@ -600,8 +627,6 @@ struct request_queue { * Serializes all debugfs metadata operations using the above dentries. */ struct mutex debugfs_mutex; - - bool mq_sysfs_init_done; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ @@ -687,23 +712,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) (q->limits.features & BLK_FEAT_ZONED); } -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return disk->nr_zones; -} -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { if (!blk_queue_is_zoned(disk->queue)) @@ -711,11 +719,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return disk_nr_zones(bdev->bd_disk); -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -822,6 +825,51 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) (sb->s_blocksize_bits - SECTOR_SHIFT); } +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return disk->nr_zones; +} +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); + +/** + * disk_zone_capacity - returns the zone capacity of zone containing @sector + * @disk: disk to work with + * @sector: sector number within the querying zone + * + * Returns the zone capacity of a zone containing @sector. @sector can be any + * sector in the zone. + */ +static inline unsigned int disk_zone_capacity(struct gendisk *disk, + sector_t sector) +{ + sector_t zone_sectors = disk->queue->limits.chunk_sectors; + + if (sector + zone_sectors >= get_capacity(disk)) + return disk->last_zone_capacity; + return disk->zone_capacity; +} +static inline unsigned int bdev_zone_capacity(struct block_device *bdev, + sector_t pos) +{ + return disk_zone_capacity(bdev->bd_disk, pos); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); @@ -939,8 +987,7 @@ static inline unsigned int blk_boundary_sectors_left(sector_t offset, * the caller can modify. The caller must call queue_limits_commit_update() * to finish the update. * - * Context: process context. The caller must have frozen the queue or ensured - * that there is outstanding I/O by other means. + * Context: process context. */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) @@ -1590,6 +1637,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } +int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1646,10 +1694,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -/* just for blk-cgroup, don't use elsewhere */ -struct block_device *blkdev_get_no_open(dev_t dev); -void blkdev_put_no_open(struct block_device *bdev); - struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); bool disk_live(struct gendisk *disk); @@ -1661,7 +1705,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx(struct path *, struct kstat *, u32); +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1679,8 +1723,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +static inline void bdev_statx(const struct path *path, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index d8a8d245824a..4c506e76a808 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -18,6 +18,8 @@ enum bootmem_type { #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE void __init register_page_bootmem_info_node(struct pglist_data *pgdat); +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, + unsigned long nr_pages); void get_page_bootmem(unsigned long info, struct page *page, enum bootmem_type type); @@ -58,6 +60,11 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { } +static inline void register_page_bootmem_memmap(unsigned long section_nr, + struct page *map, unsigned long nr_pages) +{ +} + static inline void put_page_bootmem(struct page *page) { } diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 7fc69083e745..9de7adb68294 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -111,6 +111,7 @@ struct bpf_prog_list { struct bpf_prog *prog; struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; + u32 flags; }; int cgroup_bpf_inherit(struct cgroup *cgrp); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6e63dd3443b9..3f0cc89c0622 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -30,6 +30,7 @@ #include <linux/static_call.h> #include <linux/memcontrol.h> #include <linux/cfi.h> +#include <asm/rqspinlock.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -204,6 +205,7 @@ enum btf_field_type { BPF_REFCOUNT = (1 << 9), BPF_WORKQUEUE = (1 << 10), BPF_UPTR = (1 << 11), + BPF_RES_SPIN_LOCK = (1 << 12), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -239,6 +241,7 @@ struct btf_record { u32 cnt; u32 field_mask; int spin_lock_off; + int res_spin_lock_off; int timer_off; int wq_off; int refcount_off; @@ -314,6 +317,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) switch (type) { case BPF_SPIN_LOCK: return "bpf_spin_lock"; + case BPF_RES_SPIN_LOCK: + return "bpf_res_spin_lock"; case BPF_TIMER: return "bpf_timer"; case BPF_WORKQUEUE: @@ -346,6 +351,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) switch (type) { case BPF_SPIN_LOCK: return sizeof(struct bpf_spin_lock); + case BPF_RES_SPIN_LOCK: + return sizeof(struct bpf_res_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); case BPF_WORKQUEUE: @@ -376,6 +383,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) switch (type) { case BPF_SPIN_LOCK: return __alignof__(struct bpf_spin_lock); + case BPF_RES_SPIN_LOCK: + return __alignof__(struct bpf_res_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); case BPF_WORKQUEUE: @@ -419,6 +428,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_RB_ROOT: /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: case BPF_TIMER: case BPF_WORKQUEUE: case BPF_KPTR_UNREF: @@ -968,6 +978,7 @@ struct bpf_insn_access_aux { struct { struct btf *btf; u32 btf_id; + u32 ref_obj_id; }; }; struct bpf_verifier_log *log; /* for verbose logs */ @@ -990,6 +1001,21 @@ static inline bool bpf_pseudo_func(const struct bpf_insn *insn) return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } +/* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an + * atomic load or store, and false if it is a read-modify-write instruction. + */ +static inline bool +bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn) +{ + switch (atomic_insn->imm) { + case BPF_LOAD_ACQ: + case BPF_STORE_REL: + return true; + default: + return false; + } +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1481,6 +1507,8 @@ struct bpf_ctx_arg_aux { enum bpf_reg_type reg_type; struct btf *btf; u32 btf_id; + u32 ref_obj_id; + bool refcounted; }; struct btf_mod_pair { @@ -1503,11 +1531,12 @@ struct bpf_prog_aux { u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + u32 attach_st_ops_member_off; u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; struct btf *attach_btf; - const struct bpf_ctx_arg_aux *ctx_arg_info; + struct bpf_ctx_arg_aux *ctx_arg_info; void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; @@ -1528,6 +1557,7 @@ struct bpf_prog_aux { bool jits_use_priv_stack; bool priv_stack_requested; bool changes_pkt_data; + bool might_sleep; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; @@ -1547,6 +1577,7 @@ struct bpf_prog_aux { #endif struct bpf_ksym ksym; const struct bpf_prog_ops *ops; + const struct bpf_struct_ops *st_ops; struct bpf_map **used_maps; struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ struct btf_mod_pair *used_btfs; @@ -1945,6 +1976,9 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op #endif +int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, + const struct bpf_ctx_arg_aux *info, u32 cnt); + #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, int cgroup_atype); @@ -1980,6 +2014,7 @@ struct bpf_array { */ enum { BPF_MAX_LOOPS = 8 * 1024 * 1024, + BPF_MAX_TIMED_LOOPS = 0xffff, }; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ @@ -2036,6 +2071,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); +const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void); + typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, @@ -2299,6 +2336,14 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); +/* + * The __bpf_map_get() and __btf_get_by_fd() functions parse a file + * descriptor and return a corresponding map or btf object. + * Their names are double underscored to emphasize the fact that they + * do not increase refcnt. To also increase refcnt use corresponding + * bpf_map_get() and btf_get_by_fd() functions. + */ + static inline struct bpf_map *__bpf_map_get(struct fd f) { if (fd_empty(f)) @@ -2308,6 +2353,15 @@ static inline struct bpf_map *__bpf_map_get(struct fd f) return fd_file(f)->private_data; } +static inline struct btf *__btf_get_by_fd(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &btf_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); @@ -2331,7 +2385,7 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); -int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, +int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, @@ -2529,7 +2583,7 @@ struct bpf_iter__bpf_map_elem { int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); -bool bpf_iter_prog_supported(struct bpf_prog *prog); +int bpf_iter_prog_supported(struct bpf_prog *prog); const struct bpf_func_proto * bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); @@ -2589,10 +2643,10 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog); + const struct bpf_prog *xdp_prog); int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress); + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress); void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, @@ -2862,15 +2916,15 @@ struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { return 0; } static inline int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { return 0; } diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aefcd6564251..643809cc78c3 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -48,6 +48,11 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func) int bpf_lsm_get_retval_range(const struct bpf_prog *prog, struct bpf_retval_range *range); +int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, + const struct bpf_dynptr *value_p, int flags); +int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str); +bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog); + #else /* !CONFIG_BPF_LSM */ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) @@ -86,6 +91,19 @@ static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog, { return -EOPNOTSUPP; } +static inline int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, + const struct bpf_dynptr *value_p, int flags) +{ + return -EOPNOTSUPP; +} +static inline int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str) +{ + return -EOPNOTSUPP; +} +static inline bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog) +{ + return false; +} #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 48b7b2eeb7e2..d1f02f8e3e55 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,6 +115,14 @@ struct bpf_reg_state { int depth:30; } iter; + /* For irq stack slots */ + struct { + enum { + IRQ_NATIVE_KFUNC, + IRQ_LOCK_KFUNC, + } kfunc_class; + } irq; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -233,6 +241,7 @@ enum bpf_stack_slot_type { */ STACK_DYNPTR, STACK_ITER, + STACK_IRQ_FLAG, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ @@ -254,8 +263,12 @@ struct bpf_reference_state { * default to pointer reference on zero initialization of a state. */ enum ref_state_type { - REF_TYPE_PTR = 0, - REF_TYPE_LOCK, + REF_TYPE_PTR = (1 << 1), + REF_TYPE_IRQ = (1 << 2), + REF_TYPE_LOCK = (1 << 3), + REF_TYPE_RES_LOCK = (1 << 4), + REF_TYPE_RES_LOCK_IRQ = (1 << 5), + REF_TYPE_LOCK_MASK = REF_TYPE_LOCK | REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, } type; /* Track each reference created with a unique id, even if the same * instruction creates the reference multiple times (eg, via CALL). @@ -315,9 +328,6 @@ struct bpf_func_state { u32 callback_depth; /* The following fields should be last. See copy_func_state() */ - int acquired_refs; - int active_locks; - struct bpf_reference_state *refs; /* The state of the stack. Each element of the array describes BPF_REG_SIZE * (i.e. 8) bytes worth of stack memory. * stack[0] represents bytes [*(r10-8)..*(r10-1)] @@ -346,7 +356,11 @@ enum { INSN_F_SPI_MASK = 0x3f, /* 6 bits */ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ - INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ + INSN_F_STACK_ACCESS = BIT(9), + + INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */ + INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */ + /* total 12 bits are used now. */ }; static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); @@ -355,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ - u32 prev_idx : 22; - /* special flags, e.g., whether insn is doing register stack spill/load */ - u32 flags : 10; + u32 prev_idx : 20; + /* special INSN_F_xxx flags */ + u32 flags : 12; /* additional registers that need precision tracking when this * jump is backtracked, vector of six 10-bit records */ @@ -370,6 +384,8 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; struct bpf_verifier_state *parent; + /* Acquired reference states */ + struct bpf_reference_state *refs; /* * 'branches' field is the number of branches left to explore: * 0 - all possible paths from this state reached bpf_exit or @@ -419,14 +435,15 @@ struct bpf_verifier_state { u32 insn_idx; u32 curframe; - bool speculative; + u32 acquired_refs; + u32 active_locks; + u32 active_preempt_locks; + u32 active_irq_id; + u32 active_lock_id; + void *active_lock_ptr; bool active_rcu_lock; - u32 active_preempt_lock; - /* If this state was ever pointed-to by other state's loop_entry field - * this flag would be set to true. Used to avoid freeing such states - * while they are still in use. - */ - bool used_as_loop_entry; + + bool speculative; bool in_sleepable; /* first and last insn idx of this verifier state */ @@ -453,6 +470,11 @@ struct bpf_verifier_state { u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + u32 used_as_loop_entry; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -493,8 +515,10 @@ struct bpf_verifier_state { /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; - struct bpf_verifier_state_list *next; - int miss_cnt, hit_cnt; + struct list_head node; + u32 miss_cnt; + u32 hit_cnt:31; + u32 in_free_list:1; }; struct bpf_loop_inline_state { @@ -584,6 +608,8 @@ struct bpf_insn_aux_data { * accepts callback function as a parameter. */ bool calls_callback; + /* registers alive before this instruction. */ + u16 live_regs_before; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -660,6 +686,7 @@ struct bpf_subprog_info { /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; bool changes_pkt_data: 1; + bool might_sleep: 1; enum priv_stack_mode priv_stack_mode; u8 arg_cnt; @@ -705,8 +732,11 @@ struct bpf_verifier_env { bool test_state_freq; /* test verifier with different pruning frequency */ bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ - struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_verifier_state_list *free_list; + /* Search pruning optimization, array of list_heads for + * lists of struct bpf_verifier_state_list. + */ + struct list_head *explored_states; + struct list_head free_list; /* list of struct bpf_verifier_state_list */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ u32 used_map_cnt; /* number of used maps */ @@ -737,7 +767,11 @@ struct bpf_verifier_env { struct { int *insn_state; int *insn_stack; + /* vector of instruction indexes sorted in post-order */ + int *insn_postorder; int cur_stack; + /* current position in the insn_postorder vector */ + int cur_postorder; } cfg; struct backtrack_state bt; struct bpf_insn_hist_entry *insn_hist; @@ -762,6 +796,8 @@ struct bpf_verifier_env { u32 peak_states; /* longest register parentage chain walked for liveness marking */ u32 longest_mark_read_walk; + u32 free_list_size; + u32 explored_states_size; bpfptr_t fd_array; /* bit mask to keep track of whether a register has been accessed @@ -980,8 +1016,9 @@ const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); const char *iter_state_str(enum bpf_iter_state state); -void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state, bool print_all); -void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state); +void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno, bool print_all); +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 2a08a2b55592..ebc0c0c9b944 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -76,6 +76,9 @@ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ #define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ +#define KF_ARENA_RET (1 << 13) /* kfunc returns an arena pointer */ +#define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */ +#define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 932139c5d46f..0029ff880e27 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -182,7 +183,6 @@ static inline unsigned long bh_offset(const struct buffer_head *bh) BUG_ON(!PagePrivate(page)); \ ((struct buffer_head *)page_private(page)); \ }) -#define page_has_buffers(page) PagePrivate(page) #define folio_buffers(folio) folio_get_private(folio) void buffer_check_dirty_writeback(struct folio *folio, @@ -223,6 +223,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -271,7 +273,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct folio **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); -void block_commit_write(struct page *page, unsigned int from, unsigned int to); +void block_commit_write(struct folio *folio, size_t from, size_t to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); @@ -398,6 +400,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { diff --git a/include/linux/bug.h b/include/linux/bug.h index 348acf2558f3..a9948a9f1093 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -73,15 +73,23 @@ static inline void generic_bug_clear_once(void) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef CONFIG_PRINTK +void mem_dump_obj(void *object); +#else +static inline void mem_dump_obj(void *object) {} +#endif + /* * Since detected data corruption should stop operation on the affected * structures. Return value must be checked and sanely acted on by caller. */ static inline __must_check bool check_data_corruption(bool v) { return v; } -#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \ +#define CHECK_DATA_CORRUPTION(condition, addr, fmt, ...) \ check_data_corruption(({ \ bool corruption = unlikely(condition); \ if (corruption) { \ + if (addr) \ + mem_dump_obj(addr); \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ pr_err(fmt, ##__VA_ARGS__); \ BUG(); \ diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h index 5178b72bc920..eaa7a3f54450 100644 --- a/include/linux/bus/stm32_firewall_device.h +++ b/include/linux/bus/stm32_firewall_device.h @@ -114,27 +114,30 @@ void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 su #else /* CONFIG_STM32_FIREWALL */ -int stm32_firewall_get_firewall(struct device_node *np, struct stm32_firewall *firewall, - unsigned int nb_firewall) +static inline int stm32_firewall_get_firewall(struct device_node *np, + struct stm32_firewall *firewall, + unsigned int nb_firewall) { return -ENODEV; } -int stm32_firewall_grant_access(struct stm32_firewall *firewall) +static inline int stm32_firewall_grant_access(struct stm32_firewall *firewall) { return -ENODEV; } -void stm32_firewall_release_access(struct stm32_firewall *firewall) +static inline void stm32_firewall_release_access(struct stm32_firewall *firewall) { } -int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +static inline int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, + u32 subsystem_id) { return -ENODEV; } -void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +static inline void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, + u32 subsystem_id) { } diff --git a/include/linux/bvec.h b/include/linux/bvec.h index f41c7f0ef91e..0a80e1f9aa20 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** @@ -184,6 +187,12 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv, ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) +#define for_each_mp_bvec(bvl, bio_vec, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1); \ + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) + /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ { \ @@ -286,12 +295,7 @@ static inline void *bvec_virt(struct bio_vec *bvec) */ static inline phys_addr_t bvec_phys(const struct bio_vec *bvec) { - /* - * Note this open codes page_to_phys because page_to_phys is defined in - * <asm/io.h>, which we don't want to pull in here. If it ever moves to - * a sensible place we should start using it. - */ - return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset; + return page_to_phys(bvec->bv_page) + bvec->bv_offset; } #endif /* __LINUX_BVEC_H */ diff --git a/include/linux/cache.h b/include/linux/cache.h index ca2a05682a54..e69768f50d53 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -3,16 +3,13 @@ #define __LINUX_CACHE_H #include <uapi/linux/kernel.h> +#include <vdso/cache.h> #include <asm/cache.h> #ifndef L1_CACHE_ALIGN #define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES) #endif -#ifndef SMP_CACHE_BYTES -#define SMP_CACHE_BYTES L1_CACHE_BYTES -#endif - /** * SMP_CACHE_ALIGN - align a value to the L2 cacheline size * @x: value to align @@ -63,10 +60,6 @@ #define __ro_after_init __section(".data..ro_after_init") #endif -#ifndef ____cacheline_aligned -#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) -#endif - #ifndef ____cacheline_aligned_in_smp #ifdef CONFIG_SMP #define ____cacheline_aligned_in_smp ____cacheline_aligned diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 7ad736538649..c8f4f0a0b874 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -147,7 +147,7 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level) return ci ? ci->id : -1; } -#ifdef CONFIG_ARM64 +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) #define use_arch_cache_info() (true) #else #define use_arch_cache_info() (false) diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 6261aa0b3fb0..13cd6469e7e5 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -26,20 +26,41 @@ do { \ __once_init((once), #once, &__key); \ } while (0) -static inline void call_once(struct once *once, void (*cb)(struct once *)) +/* + * call_once - Ensure a function has been called exactly once + * + * @once: Tracking struct + * @cb: Function to be called + * + * If @once has never completed successfully before, call @cb and, if + * it returns a zero or positive value, mark @once as completed. Return + * the value returned by @cb + * + * If @once has completed succesfully before, return 0. + * + * The call to @cb is implicitly surrounded by a mutex, though for + * efficiency the * function avoids taking it after the first call. + */ +static inline int call_once(struct once *once, int (*cb)(struct once *)) { - /* Pairs with atomic_set_release() below. */ - if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) - return; - - guard(mutex)(&once->lock); - WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); - if (atomic_read(&once->state) != ONCE_NOT_STARTED) - return; - - atomic_set(&once->state, ONCE_RUNNING); - cb(once); - atomic_set_release(&once->state, ONCE_COMPLETED); + int r, state; + + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return 0; + + guard(mutex)(&once->lock); + state = atomic_read(&once->state); + if (unlikely(state != ONCE_NOT_STARTED)) + return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0; + + atomic_set(&once->state, ONCE_RUNNING); + r = cb(once); + if (r < 0) + atomic_set(&once->state, ONCE_NOT_STARTED); + else + atomic_set_release(&once->state, ONCE_COMPLETED); + return r; } #endif /* _LINUX_CALL_ONCE_H */ diff --git a/include/linux/capability.h b/include/linux/capability.h index 0c356a517991..1fb08922552c 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -139,7 +139,6 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, } #ifdef CONFIG_MULTIUSER -extern bool has_capability(struct task_struct *t, int cap); extern bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap); extern bool has_capability_noaudit(struct task_struct *t, int cap); @@ -150,10 +149,6 @@ extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); extern bool ns_capable_setid(struct user_namespace *ns, int cap); #else -static inline bool has_capability(struct task_struct *t, int cap) -{ - return true; -} static inline bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap) { diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index caa4b4430634..0bf7d33a1048 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -82,6 +82,14 @@ enum cc_attr { CC_ATTR_GUEST_SEV_SNP, /** + * @CC_ATTR_GUEST_SNP_SECURE_TSC: SNP Secure TSC is active. + * + * The platform/OS is running as a guest/virtual machine and actively + * using AMD SEV-SNP Secure TSC feature. + */ + CC_ATTR_GUEST_SNP_SECURE_TSC, + + /** * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. * * The host kernel is running with the necessary features diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 2d7d86f0290d..c7f2c63b3bc3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -504,20 +504,6 @@ struct ceph_mds_request_head_legacy { #define CEPH_MDS_REQUEST_HEAD_VERSION 3 -struct ceph_mds_request_head_old { - __le16 version; /* struct version */ - __le64 oldest_client_tid; - __le32 mdsmap_epoch; /* on client */ - __le32 flags; /* CEPH_MDS_FLAG_* */ - __u8 num_retry, num_fwd; /* count retry, fwd attempts */ - __le16 num_releases; /* # include cap/lease release records */ - __le32 op; /* mds op code */ - __le32 caller_uid, caller_gid; - __le64 ino; /* use this ino for openc, mkdir, mknod, - etc. (if replaying) */ - union ceph_mds_request_args_ext args; -} __attribute__ ((packed)); - struct ceph_mds_request_head { __le16 version; /* struct version */ __le64 oldest_client_tid; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d55b30057a45..50b14a5661c7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -490,9 +490,6 @@ extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -509,9 +506,6 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter); -extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/include/linux/cfi.h b/include/linux/cfi.h index f0df518e11dd..1db17ecbb86c 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,6 +11,8 @@ #include <linux/module.h> #include <asm/cfi.h> +extern bool cfi_warn; + #ifndef cfi_get_offset static inline int cfi_get_offset(void) { diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 17960a1e858d..5bc8f55c8cca 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -619,9 +619,8 @@ struct cgroup_root { */ struct cftype { /* - * By convention, the name should begin with the name of the - * subsystem, followed by a period. Zero length string indicates - * end of cftype array. + * Name of the subsystem is prepended in cgroup_file_name(). + * Zero length string indicates end of cftype array. */ char name[MAX_CFTYPE_NAME]; unsigned long private; @@ -711,6 +710,7 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_killed)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f8ef47f8a634..e7da3c3b098b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -113,6 +113,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); void cgroup_file_show(struct cgroup_file *cfile, bool show); @@ -343,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { - return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); + return css->flags & CSS_DYING; } static inline void cgroup_get(struct cgroup *cgrp) @@ -689,8 +690,6 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) */ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); void cgroup_rstat_flush(struct cgroup *cgrp); -void cgroup_rstat_flush_hold(struct cgroup *cgrp); -void cgroup_rstat_flush_release(struct cgroup *cgrp); /* * Basic resource stats. diff --git a/include/linux/cgroup_dmem.h b/include/linux/cgroup_dmem.h new file mode 100644 index 000000000000..dd4869f1d736 --- /dev/null +++ b/include/linux/cgroup_dmem.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _CGROUP_DMEM_H +#define _CGROUP_DMEM_H + +#include <linux/types.h> +#include <linux/llist.h> + +struct dmem_cgroup_pool_state; + +/* Opaque definition of a cgroup region, used internally */ +struct dmem_cgroup_region; + +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *name_fmt, ...) __printf(2,3); +void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region); +int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, + struct dmem_cgroup_pool_state **ret_pool, + struct dmem_cgroup_pool_state **ret_limit_pool); +void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size); +bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, + struct dmem_cgroup_pool_state *test_pool, + bool ignore_low, bool *ret_hit_low); + +void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool); +#else +static inline __printf(2,3) struct dmem_cgroup_region * +dmem_cgroup_register_region(u64 size, const char *name_fmt, ...) +{ + return NULL; +} + +static inline void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) +{ } + +static inline int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, + struct dmem_cgroup_pool_state **ret_pool, + struct dmem_cgroup_pool_state **ret_limit_pool) +{ + *ret_pool = NULL; + + if (ret_limit_pool) + *ret_limit_pool = NULL; + + return 0; +} + +static inline void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) +{ } + +static inline +bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, + struct dmem_cgroup_pool_state *test_pool, + bool ignore_low, bool *ret_hit_low) +{ + return true; +} + +static inline void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) +{ } + +#endif +#endif /* _CGROUP_DMEM_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 445235487230..3fd0bcbf3080 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -65,6 +65,10 @@ SUBSYS(rdma) SUBSYS(misc) #endif +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +SUBSYS(dmem) +#endif + /* * The following subsystems are not supported on the default hierarchy. */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index ec00e3f7af2b..7e57047e1564 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -212,7 +212,7 @@ const volatile void * __must_check_fn(const volatile void *val) { return val; } #define no_free_ptr(p) \ - ((typeof(p)) __must_check_fn(__get_and_null(p, NULL))) + ((typeof(p)) __must_check_fn((__force const volatile void *)__get_and_null(p, NULL))) #define return_ptr(p) return no_free_ptr(p) @@ -291,11 +291,21 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return (void *)(__force unsigned long)*(_exp); } + +#define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ + __DEFINE_GUARD_LOCK_PTR(_name, _T) + +#define DEFINE_CLASS_IS_COND_GUARD(_name) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name, true); \ + __DEFINE_GUARD_LOCK_PTR(_name, _T) + +#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*_T; } + DEFINE_CLASS_IS_GUARD(_name) #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ @@ -375,11 +385,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ if (_T->lock) { _unlock; } \ } \ \ -static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ -{ \ - return (void *)(__force unsigned long)_T->lock; \ -} - +__DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) #define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ static inline class_##_name##_t class_##_name##_constructor(_type *l) \ diff --git a/include/linux/clk.h b/include/linux/clk.h index 1dcee6d701e4..b607482ca77e 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -1138,15 +1138,6 @@ static inline void clk_restore_context(void) {} #endif -/* Deprecated. Use devm_clk_bulk_get_all_enabled() */ -static inline int __must_check -devm_clk_bulk_get_all_enable(struct device *dev, struct clk_bulk_data **clks) -{ - int ret = devm_clk_bulk_get_all_enabled(dev, clks); - - return ret > 0 ? 0 : ret; -} - /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */ static inline int clk_prepare_enable(struct clk *clk) { diff --git a/include/linux/clk/davinci.h b/include/linux/clk/davinci.h index e1d37451e03f..787a81116b00 100644 --- a/include/linux/clk/davinci.h +++ b/include/linux/clk/davinci.h @@ -12,12 +12,6 @@ #include <linux/regmap.h> /* function for registering clocks in early boot */ - -#ifdef CONFIG_ARCH_DAVINCI_DA830 -int da830_pll_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -#endif -#ifdef CONFIG_ARCH_DAVINCI_DA850 int da850_pll0_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -#endif #endif /* __LINUX_CLK_DAVINCI_PLL_H___ */ diff --git a/include/linux/cma.h b/include/linux/cma.h index d15b64f51336..62d9c1cf6326 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -40,6 +40,9 @@ static inline int __init cma_declare_contiguous(phys_addr_t base, return cma_declare_contiguous_nid(base, size, limit, alignment, order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); } +extern int __init cma_declare_contiguous_multi(phys_addr_t size, + phys_addr_t align, unsigned int order_per_bit, + const char *name, struct cma **res_cma, int nid); extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, @@ -50,12 +53,14 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); +extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end); extern void cma_reserve_pages_on_error(struct cma *cma); #ifdef CONFIG_CMA struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); bool cma_free_folio(struct cma *cma, const struct folio *folio); +bool cma_validate_zones(struct cma *cma); #else static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) { @@ -66,6 +71,10 @@ static inline bool cma_free_folio(struct cma *cma, const struct folio *folio) { return false; } +static inline bool cma_validate_zones(struct cma *cma) +{ + return false; +} #endif #endif diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d14dbd26b370..5f2b9a1f722c 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,10 +36,10 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct codetag_type *cttype, - struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, - struct codetag_module *cmod); + int (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES void (*module_replaced)(struct module *mod, struct module *new_mod); bool (*needs_section_mem)(struct module *mod, unsigned long size); @@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align); void codetag_free_module_sections(struct module *mod); void codetag_module_replaced(struct module *mod, struct module *new_mod); -void codetag_load_module(struct module *mod); +int codetag_load_module(struct module *mod); void codetag_unload_module(struct module *mod); #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ @@ -103,7 +103,7 @@ codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align) { return NULL; } static inline void codetag_free_module_sections(struct module *mod) {} static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} -static inline void codetag_load_module(struct module *mod) {} +static inline int codetag_load_module(struct module *mod) { return 0; } static inline void codetag_unload_module(struct module *mod) {} #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 7bf0c521db63..173d9c07a895 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -95,7 +95,7 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, struct page **page); extern void reset_isolation_suitable(pg_data_t *pgdat); extern bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx); + unsigned long watermark, int highest_zoneidx); extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); @@ -113,7 +113,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat) } static inline bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx) + unsigned long watermark, + int highest_zoneidx) { return false; } diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 2e7c2c282f3a..4fc8e26914ad 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -128,3 +128,11 @@ */ #define ASM_INPUT_G "ir" #define ASM_INPUT_RM "r" + +/* + * Declare compiler support for __typeof_unqual__() operator. + * + * Bindgen uses LLVM even if our C compiler is GCC, so we cannot + * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL. + */ +#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ >= 19) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c9b58188ec61..32048052c64a 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -137,3 +137,11 @@ #if GCC_VERSION < 90100 #undef __alloc_size__ #endif + +/* + * Declare compiler support for __typeof_unqual__() operator. + * + * Bindgen uses LLVM even if our C compiler is GCC, so we cannot + * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL. + */ +#define CC_HAS_TYPEOF_UNQUAL (__GNUC__ >= 14) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d004f9b5528d..27725f1ab5ab 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -198,14 +198,66 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif /* __CHECKER__ */ /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") +#define __is_array(a) (!__same_type((a), &(a)[0])) +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \ + "must be array") + +#define __is_byte_array(a) (__is_array(a) && sizeof((a)[0]) == 1) +#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \ + "must be byte array") + +/* + * If the "nonstring" attribute isn't available, we have to return true + * so the __must_*() checks pass when "nonstring" isn't supported. + */ +#if __has_attribute(__nonstring__) && defined(__annotated) +#define __is_cstr(a) (!__annotated(a, nonstring)) +#define __is_noncstr(a) (__annotated(a, nonstring)) +#else +#define __is_cstr(a) (true) +#define __is_noncstr(a) (true) +#endif /* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ #define __must_be_cstr(p) \ - __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") + __BUILD_BUG_ON_ZERO_MSG(!__is_cstr(p), \ + "must be C-string (NUL-terminated)") +#define __must_be_noncstr(p) \ + __BUILD_BUG_ON_ZERO_MSG(!__is_noncstr(p), \ + "must be non-C-string (not NUL-terminated)") + +/* + * Use __typeof_unqual__() when available. + * + * XXX: Remove test for __CHECKER__ once + * sparse learns about __typeof_unqual__(). + */ +#if CC_HAS_TYPEOF_UNQUAL && !defined(__CHECKER__) +# define USE_TYPEOF_UNQUAL 1 +#endif + +/* + * Define TYPEOF_UNQUAL() to use __typeof_unqual__() as typeof + * operator when available, to return an unqualified type of the exp. + */ +#if defined(USE_TYPEOF_UNQUAL) +# define TYPEOF_UNQUAL(exp) __typeof_unqual__(exp) +#else +# define TYPEOF_UNQUAL(exp) __typeof__(exp) +#endif #endif /* __KERNEL__ */ +#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +/* + * Force a reference to the external symbol so the compiler generates + * __kcfi_typid. + */ +#define KCFI_REFERENCE(sym) __ADDRESSABLE(sym) +#else +#define KCFI_REFERENCE(sym) +#endif + /** * offset_to_ptr - convert a relative memory offset to an absolute pointer * @off: the address of the 32-bit offset value @@ -308,6 +360,28 @@ static inline void *offset_to_ptr(const int *off) #define statically_true(x) (__builtin_constant_p(x) && (x)) /* + * Similar to statically_true() but produces a constant expression + * + * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(), + * which require their input to be a constant expression and for which + * statically_true() would otherwise fail. + * + * This is a trade-off: const_true() requires all its operands to be + * compile time constants. Else, it would always returns false even on + * the most trivial cases like: + * + * true || non_const_var + * + * On the opposite, statically_true() is able to fold more complex + * tautologies and will return true on expressions such as: + * + * !(non_const_var * 8 % 4) + * + * For the general case, statically_true() is better. + */ +#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false) + +/* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 981cc3d7e3aa..501cffddc2f4 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -57,7 +57,7 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __user BTF_TYPE_TAG(user) # endif # define __iomem -# define __percpu BTF_TYPE_TAG(percpu) +# define __percpu __percpu_qual BTF_TYPE_TAG(percpu) # define __rcu BTF_TYPE_TAG(rcu) # define __chk_user_ptr(x) (void)0 @@ -349,6 +349,18 @@ struct ftrace_likely_data { #endif /* + * Optional: only supported since gcc >= 15 + * Optional: not supported by Clang + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117178 + */ +#ifdef CONFIG_CC_HAS_MULTIDIMENSIONAL_NONSTRING +# define __nonstring_array __attribute__((__nonstring__)) +#else +# define __nonstring_array +#endif + +/* * Apply __counted_by() when the Endianness matches to increase test coverage. */ #ifdef __LITTLE_ENDIAN @@ -360,7 +372,7 @@ struct ftrace_likely_data { #endif /* Do not trap wrapping arithmetic within an annotated function. */ -#ifdef CONFIG_UBSAN_SIGNED_WRAP +#ifdef CONFIG_UBSAN_INTEGER_WRAP # define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) #else # define __signed_wrap @@ -446,11 +458,14 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif -/* Determine if an attribute has been applied to a variable. */ +/* + * Determine if an attribute has been applied to a variable. + * Using __annotated needs to check for __annotated being available, + * or negative tests may fail when annotation cannot be checked. For + * example, see the definition of __is_cstr(). + */ #if __has_builtin(__builtin_has_attribute) #define __annotated(var, attr) __builtin_has_attribute(var, attr) -#else -#define __annotated(var, attr) (false) #endif /* diff --git a/include/linux/component.h b/include/linux/component.h index df4aa75c9e7c..9d6c66401280 100644 --- a/include/linux/component.h +++ b/include/linux/component.h @@ -3,7 +3,7 @@ #define COMPONENT_H #include <linux/stddef.h> - +#include <linux/types.h> struct device; @@ -90,6 +90,8 @@ int component_compare_dev_name(struct device *dev, void *data); void component_master_del(struct device *, const struct component_master_ops *); +bool component_master_is_bound(struct device *parent, + const struct component_master_ops *ops); struct component_match; diff --git a/include/linux/console.h b/include/linux/console.h index eba367bf605d..8f10d0a85bb4 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -633,8 +633,8 @@ extern void console_conditional_schedule(void); extern void console_unblank(void); extern void console_flush_on_panic(enum con_flush_mode mode); extern struct tty_driver *console_device(int *); -extern void console_stop(struct console *); -extern void console_start(struct console *); +extern void console_suspend(struct console *); +extern void console_resume(struct console *); extern int is_console_locked(void); extern int braille_register_console(struct console *, int index, char *console_options, char *braille_options); @@ -648,8 +648,8 @@ static inline void console_sysfs_notify(void) extern bool console_suspend_enabled; /* Suspend and resume console messages over PM events */ -extern void suspend_console(void); -extern void resume_console(void); +extern void console_suspend_all(void); +extern void console_resume_all(void); int mda_console_init(void); diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h index c50b5670c4a5..197916ee91a4 100644 --- a/include/linux/context_tracking_irq.h +++ b/include/linux/context_tracking_irq.h @@ -10,12 +10,12 @@ void ct_irq_exit_irqson(void); void ct_nmi_enter(void); void ct_nmi_exit(void); #else -static inline void ct_irq_enter(void) { } -static inline void ct_irq_exit(void) { } +static __always_inline void ct_irq_enter(void) { } +static __always_inline void ct_irq_exit(void) { } static inline void ct_irq_enter_irqson(void) { } static inline void ct_irq_exit_irqson(void) { } -static inline void ct_nmi_enter(void) { } -static inline void ct_nmi_exit(void) { } +static __always_inline void ct_nmi_enter(void) { } +static __always_inline void ct_nmi_exit(void) { } #endif #endif diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d6..76e41805b92d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index c13342594278..cfcf6e4707ed 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -71,7 +71,8 @@ enum coresight_dev_subtype_source { enum coresight_dev_subtype_helper { CORESIGHT_DEV_SUBTYPE_HELPER_CATU, - CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI + CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI, + CORESIGHT_DEV_SUBTYPE_HELPER_CTCU, }; /** @@ -172,6 +173,9 @@ struct coresight_desc { * @dest_dev: a @coresight_device representation of the component connected to @src_port. NULL until the device is created * @link: Representation of the connection as a sysfs link. + * @filter_src_fwnode: filter source component's fwnode handle. + * @filter_src_dev: a @coresight_device representation of the component that + needs to be filtered. * * The full connection structure looks like this, where in_conns store * references to same connection as the source device's out_conns. @@ -200,8 +204,10 @@ struct coresight_connection { struct coresight_device *dest_dev; struct coresight_sysfs_link *link; struct coresight_device *src_dev; - atomic_t src_refcnt; - atomic_t dest_refcnt; + struct fwnode_handle *filter_src_fwnode; + struct coresight_device *filter_src_dev; + int src_refcnt; + int dest_refcnt; }; /** @@ -233,7 +239,7 @@ struct coresight_trace_id_map { DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); atomic_t __percpu *cpu_map; atomic_t perf_cs_etm_session_active; - spinlock_t lock; + raw_spinlock_t lock; }; /** @@ -296,7 +302,7 @@ struct coresight_device { /* system configuration and feature lists */ struct list_head feature_csdev_list; struct list_head config_csdev_list; - spinlock_t cscfg_csdev_lock; + raw_spinlock_t cscfg_csdev_lock; void *active_cscfg_ctxt; }; @@ -324,17 +330,29 @@ static struct coresight_dev_list (var) = { \ #define to_coresight_device(d) container_of(d, struct coresight_device, dev) +/** + * struct coresight_path - data needed by enable/disable path + * @path_list: path from source to sink. + * @trace_id: trace_id of the whole path. + */ +struct coresight_path { + struct list_head path_list; + u8 trace_id; +}; + enum cs_mode { CS_MODE_DISABLED, CS_MODE_SYSFS, CS_MODE_PERF, }; +#define coresight_ops(csdev) csdev->ops #define source_ops(csdev) csdev->ops->source_ops #define sink_ops(csdev) csdev->ops->sink_ops #define link_ops(csdev) csdev->ops->link_ops #define helper_ops(csdev) csdev->ops->helper_ops #define ect_ops(csdev) csdev->ops->ect_ops +#define panic_ops(csdev) csdev->ops->panic_ops /** * struct coresight_ops_sink - basic operations for a sink @@ -384,7 +402,7 @@ struct coresight_ops_link { struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, - enum cs_mode mode, struct coresight_trace_id_map *id_map); + enum cs_mode mode, struct coresight_path *path); void (*disable)(struct coresight_device *csdev, struct perf_event *event); }; @@ -404,11 +422,24 @@ struct coresight_ops_helper { int (*disable)(struct coresight_device *csdev, void *data); }; + +/** + * struct coresight_ops_panic - Generic device ops for panic handing + * + * @sync : Sync the device register state/trace data + */ +struct coresight_ops_panic { + int (*sync)(struct coresight_device *csdev); +}; + struct coresight_ops { + int (*trace_id)(struct coresight_device *csdev, enum cs_mode mode, + struct coresight_device *sink); const struct coresight_ops_sink *sink_ops; const struct coresight_ops_link *link_ops; const struct coresight_ops_source *source_ops; const struct coresight_ops_helper *helper_ops; + const struct coresight_ops_panic *panic_ops; }; static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, @@ -454,8 +485,11 @@ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) int ret; pclk = clk_get(dev, "apb_pclk"); - if (IS_ERR(pclk)) - return NULL; + if (IS_ERR(pclk)) { + pclk = clk_get(dev, "apb"); + if (IS_ERR(pclk)) + return NULL; + } ret = clk_prepare_enable(pclk); if (ret) { @@ -588,9 +622,14 @@ static inline void csdev_access_write64(struct csdev_access *csa, u64 val, u32 o } #endif /* CONFIG_64BIT */ +static inline bool coresight_is_device_source(struct coresight_device *csdev) +{ + return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE); +} + static inline bool coresight_is_percpu_source(struct coresight_device *csdev) { - return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE) && + return csdev && coresight_is_device_source(csdev) && (csdev->subtype.source_subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_PROC); } @@ -639,6 +678,10 @@ extern int coresight_enable_sysfs(struct coresight_device *csdev); extern void coresight_disable_sysfs(struct coresight_device *csdev); extern int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value); +typedef void (*coresight_timeout_cb_t) (struct csdev_access *, u32, int, int); +extern int coresight_timeout_action(struct csdev_access *csa, u32 offset, + int position, int value, + coresight_timeout_cb_t cb); extern int coresight_claim_device(struct coresight_device *csdev); extern int coresight_claim_device_unlocked(struct coresight_device *csdev); @@ -662,6 +705,7 @@ void coresight_relaxed_write64(struct coresight_device *csdev, void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset); extern int coresight_get_cpu(struct device *dev); +extern int coresight_get_static_trace_id(struct device *dev, u32 *id); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); struct coresight_connection * @@ -679,8 +723,10 @@ coresight_find_output_type(struct coresight_platform_data *pdata, union coresight_dev_subtype subtype); int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, - struct platform_driver *pdev_drv); + struct platform_driver *pdev_drv, struct module *owner); void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); +int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, + struct coresight_device *sink); #endif /* _LINUX_COREISGHT_H */ diff --git a/include/linux/counter.h b/include/linux/counter.h index 426b7d58a438..f208e867dd0f 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -580,6 +580,9 @@ struct counter_array { #define COUNTER_COMP_CEILING(_read, _write) \ COUNTER_COMP_COUNT_U64("ceiling", _read, _write) +#define COUNTER_COMP_COMPARE(_read, _write) \ + COUNTER_COMP_COUNT_U64("compare", _read, _write) + #define COUNTER_COMP_COUNT_MODE(_read, _write, _available) \ { \ .type = COUNTER_COMP_COUNT_MODE, \ diff --git a/include/linux/cper.h b/include/linux/cper.h index 265b0f8fc0b3..0ed60a91eca9 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -89,6 +89,10 @@ enum { #define CPER_NOTIFY_DMAR \ GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Protocol Error Section */ +#define CPER_SEC_CXL_PROT_ERR \ + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ + 0x4B, 0x77, 0x10, 0x48) /* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ /* @@ -601,4 +605,8 @@ void cper_estatus_print(const char *pfx, int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); int cper_estatus_check(const struct acpi_hest_generic_status *estatus); +struct cxl_cper_sec_prot_err; +void cxl_cper_print_prot_err(const char *pfx, + const struct cxl_cper_sec_prot_err *prot_err); + #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index bdcec1732445..1ee8e9e3037d 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -77,6 +77,10 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, @@ -147,7 +151,7 @@ static inline int suspend_disable_secondary_cpus(void) } static inline void suspend_enable_secondary_cpus(void) { - return thaw_secondary_cpus(); + thaw_secondary_cpus(); } #else /* !CONFIG_PM_SLEEP_SMP */ diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index 20b5729903d7..2fd7ba75362a 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -32,6 +32,7 @@ struct cpu_rmap { #define CPU_RMAP_DIST_INF 0xffff extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); +extern void cpu_rmap_get(struct cpu_rmap *rmap); extern int cpu_rmap_put(struct cpu_rmap *rmap); extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7fe0981a7e46..7a5b391dcc01 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -144,6 +144,9 @@ struct cpufreq_policy { /* Per policy boost enabled flag. */ bool boost_enabled; + /* Per policy boost supported flag. */ + bool boost_supported; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; @@ -210,6 +213,9 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { } #endif +/* Scope based cleanup macro for cpufreq_policy kobject reference counting */ +DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T)) + static inline bool policy_is_inactive(struct cpufreq_policy *policy) { return cpumask_empty(policy->cpus); @@ -770,18 +776,16 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf); #ifdef CONFIG_CPU_FREQ -int cpufreq_boost_trigger_state(int state); bool cpufreq_boost_enabled(void); -int cpufreq_enable_boost_support(void); -bool policy_has_boost_freq(struct cpufreq_policy *policy); +int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state); /* Find lowest freq at or above target in a table in ascending order */ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy, @@ -836,12 +840,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -851,6 +855,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -904,12 +916,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -919,6 +931,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -989,12 +1009,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1004,7 +1024,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1013,11 +1043,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1028,29 +1060,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } @@ -1150,23 +1179,14 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ return 0; } #else -static inline int cpufreq_boost_trigger_state(int state) -{ - return 0; -} static inline bool cpufreq_boost_enabled(void) { return false; } -static inline int cpufreq_enable_boost_support(void) +static inline int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) { - return -EINVAL; -} - -static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) -{ - return false; + return -EOPNOTSUPP; } static inline int @@ -1184,7 +1204,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -extern unsigned int arch_freq_get_on_cpu(int cpu); +extern int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale static __always_inline @@ -1198,7 +1218,6 @@ void arch_set_freq_scale(const struct cpumask *cpus, /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; -extern struct freq_attr *cpufreq_generic_attr[]; int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy); unsigned int cpufreq_generic_get(unsigned int cpu); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index a04b73c40173..1987400000b4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -116,7 +116,6 @@ enum cpuhp_state { CPUHP_NET_IUCV_PREPARE, CPUHP_ARM_BL_PREPARE, CPUHP_TRACE_RB_PREPARE, - CPUHP_MM_ZS_PREPARE, CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, @@ -240,6 +239,7 @@ enum cpuhp_state { CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, + CPUHP_AP_KTHREADS_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9278a50d514f..f9a868384083 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -81,7 +81,7 @@ static __always_inline void set_nr_cpu_ids(unsigned int nr) * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated - * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online + * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * @@ -285,35 +285,52 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, } /** - * for_each_cpu - iterate over every cpu in a mask - * @cpu: the (optionally unsigned) integer iterator - * @mask: the cpumask pointer + * cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from + * @n+1. If nothing found, wrap around and start from + * the beginning + * @n: the cpu prior to the place to search (i.e. search starts from @n+1) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer * - * After the loop, cpu is >= nr_cpu_ids. + * Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src1p & @src2p is empty. */ -#define for_each_cpu(cpu, mask) \ - for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) - -#if NR_CPUS == 1 static __always_inline -unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +unsigned int cpumask_next_and_wrap(int n, const struct cpumask *src1p, + const struct cpumask *src2p) { - cpumask_check(start); + /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); + return find_next_and_bit_wrap(cpumask_bits(src1p), cpumask_bits(src2p), + small_cpumask_bits, n + 1); +} - /* - * Return the first available CPU when wrapping, or when starting before cpu0, - * since there is only one valid option. - */ - if (wrap && n >= 0) - return nr_cpumask_bits; - - return cpumask_first(mask); +/** + * cpumask_next_wrap - get the next cpu in *src, starting from @n+1. If nothing + * found, wrap around and start from the beginning + * @n: the cpu prior to the place to search (i.e. search starts from @n+1) + * @src: cpumask pointer + * + * Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src is empty. + */ +static __always_inline +unsigned int cpumask_next_wrap(int n, const struct cpumask *src) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_bit_wrap(cpumask_bits(src), small_cpumask_bits, n + 1); } -#else -unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); -#endif + +/** + * for_each_cpu - iterate over every cpu in a mask + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu(cpu, mask) \ + for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location @@ -391,7 +408,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta for_each_set_bit_from(cpu, cpumask_bits(mask), small_cpumask_bits) /** - * cpumask_any_but - return a "random" in a cpumask, but not this one. + * cpumask_any_but - return an arbitrary cpu in a cpumask, but not this one. * @mask: the cpumask to search * @cpu: the cpu to ignore. * @@ -411,7 +428,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** - * cpumask_any_and_but - pick a "random" cpu from *mask1 & *mask2, but not this one. + * cpumask_any_and_but - pick an arbitrary cpu from *mask1 & *mask2, but not this one. * @mask1: the first input cpumask * @mask2: the second input cpumask * @cpu: the cpu to ignore @@ -840,7 +857,7 @@ void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) } /** - * cpumask_any - pick a "random" cpu from *srcp + * cpumask_any - pick an arbitrary cpu from *srcp * @srcp: the input cpumask * * Return: >= nr_cpu_ids if no cpus set. @@ -848,7 +865,7 @@ void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) #define cpumask_any(srcp) cpumask_first(srcp) /** - * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 + * cpumask_any_and - pick an arbitrary cpu from *mask1 & *mask2 * @mask1: the first input cpumask * @mask2: the second input cpumask * @@ -1033,17 +1050,26 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) #define for_each_online_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) #define for_each_present_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) + +#define for_each_possible_cpu_wrap(cpu, start) \ + for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++) +#define for_each_online_cpu_wrap(cpu, start) \ + for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++) #else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) #define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) + +#define for_each_possible_cpu_wrap(cpu, start) \ + for_each_cpu_wrap((cpu), cpu_possible_mask, (start)) +#define for_each_online_cpu_wrap(cpu, start) \ + for_each_cpu_wrap((cpu), cpu_online_mask, (start)) #endif /* Wrappers for arch boot code to manipulate normally-constant masks */ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); -void init_cpu_online(const struct cpumask *src); #define assign_cpu(cpu, mask, val) \ assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 835e7b793f6a..5466c96a33db 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -125,9 +125,11 @@ static inline int cpuset_do_page_mem_spread(void) extern bool current_cpuset_is_being_rebound(void); +extern void dl_rebuild_rd_accounting(void); extern void rebuild_sched_domains(void); extern void cpuset_print_current_mems_allowed(void); +extern void cpuset_reset_sched_domains(void); /* * read_mems_allowed_begin is required when making decisions involving @@ -259,11 +261,20 @@ static inline bool current_cpuset_is_being_rebound(void) return false; } +static inline void dl_rebuild_rd_accounting(void) +{ +} + static inline void rebuild_sched_domains(void) { partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_reset_sched_domains(void) +{ + partition_sched_domains(1, NULL, NULL); +} + static inline void cpuset_print_current_mems_allowed(void) { } diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index acc55626afdc..2f2555e6407c 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -20,6 +20,8 @@ extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); extern void elfcorehdr_free(unsigned long long addr); extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos); extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size); extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); @@ -99,6 +101,12 @@ static inline void vmcore_unusable(void) * indicated in the vmcore instead. For example, a ballooned page * contains no data and reading from such a page will cause high * load in the hypervisor. + * @get_device_ram: query RAM ranges that can only be detected by device + * drivers, such as the virtio-mem driver, so they can be included in + * the crash dump on architectures that allocate the elfcore hdr in the dump + * ("2nd") kernel. Indicated RAM ranges may contain holes to reduce the + * total number of ranges; such holes can be detected using the pfn_is_ram + * callback just like for other RAM. * @next: List head to manage registered callbacks internally; initialized by * register_vmcore_cb(). * @@ -109,11 +117,44 @@ static inline void vmcore_unusable(void) */ struct vmcore_cb { bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn); + int (*get_device_ram)(struct vmcore_cb *cb, struct list_head *list); struct list_head next; }; extern void register_vmcore_cb(struct vmcore_cb *cb); extern void unregister_vmcore_cb(struct vmcore_cb *cb); +struct vmcore_range { + struct list_head list; + unsigned long long paddr; + unsigned long long size; + loff_t offset; +}; + +/* Allocate a vmcore range and add it to the list. */ +static inline int vmcore_alloc_add_range(struct list_head *list, + unsigned long long paddr, unsigned long long size) +{ + struct vmcore_range *m = kzalloc(sizeof(*m), GFP_KERNEL); + + if (!m) + return -ENOMEM; + m->paddr = paddr; + m->size = size; + list_add_tail(&m->list, list); + return 0; +} + +/* Free a list of vmcore ranges. */ +static inline void vmcore_free_ranges(struct list_head *list) +{ + struct vmcore_range *m, *tmp; + + list_for_each_entry_safe(m, tmp, list, list) { + list_del(&m->list); + kfree(m); + } +} + #else /* !CONFIG_CRASH_DUMP */ static inline bool is_kdump_kernel(void) { return false; } #endif /* CONFIG_CRASH_DUMP */ diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 5a9df944fb80..1fe7e7d1b214 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -32,13 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, #define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() #endif -void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high); +void __init reserve_crashkernel_generic(unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); #else -static inline void __init reserve_crashkernel_generic(char *cmdline, +static inline void __init reserve_crashkernel_generic( unsigned long long crash_size, unsigned long long crash_base, unsigned long long crash_low_size, diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index 6bb0c0bf357b..a559fdff3f7e 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -4,13 +4,19 @@ #include <linux/types.h> -#define CRC_T10DIF_DIGEST_SIZE 2 -#define CRC_T10DIF_BLOCK_SIZE 1 -#define CRC_T10DIF_STRING "crct10dif" +u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len); +u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len); -extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, - size_t len); -extern __u16 crc_t10dif(unsigned char const *, size_t); -extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t); +static inline u16 crc_t10dif_update(u16 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC_T10DIF_ARCH)) + return crc_t10dif_arch(crc, p, len); + return crc_t10dif_generic(crc, p, len); +} + +static inline u16 crc_t10dif(const u8 *p, size_t len) +{ + return crc_t10dif_update(0, p, len); +} #endif diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 87f788c0d607..69c2e8bb3782 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -8,10 +8,48 @@ #include <linux/types.h> #include <linux/bitrev.h> -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len); +u32 crc32_le_base(u32 crc, const u8 *p, size_t len); +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len); +u32 crc32_be_base(u32 crc, const u8 *p, size_t len); +u32 crc32c_arch(u32 crc, const u8 *p, size_t len); +u32 crc32c_base(u32 crc, const u8 *p, size_t len); + +static inline u32 crc32_le(u32 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_le_arch(crc, p, len); + return crc32_le_base(crc, p, len); +} + +static inline u32 crc32_be(u32 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_be_arch(crc, p, len); + return crc32_be_base(crc, p, len); +} + +static inline u32 crc32c(u32 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32c_arch(crc, p, len); + return crc32c_base(crc, p, len); +} + +/* + * crc32_optimizations() returns flags that indicate which CRC32 library + * functions are using architecture-specific optimizations. Unlike + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 + * variants and also whether any needed CPU features are available at runtime. + */ +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ +#define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */ +#if IS_ENABLED(CONFIG_CRC32_ARCH) +u32 crc32_optimizations(void); +#else +static inline u32 crc32_optimizations(void) { return 0; } +#endif /** * crc32_le_combine - Combine two crc32 check values into one. For two @@ -31,39 +69,34 @@ u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); * with the same initializer as crc1, and crc2 seed was 0. See * also crc32_combine_test(). */ -u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len); +u32 crc32_le_shift(u32 crc, size_t len); static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len); +u32 crc32c_shift(u32 crc, size_t len); /** - * __crc32c_le_combine - Combine two crc32c check values into one. For two - * sequences of bytes, seq1 and seq2 with lengths len1 - * and len2, __crc32c_le() check values were calculated - * for each, crc1 and crc2. + * crc32c_combine - Combine two crc32c check values into one. For two sequences + * of bytes, seq1 and seq2 with lengths len1 and len2, crc32c() + * check values were calculated for each, crc1 and crc2. * * @crc1: crc32c of the first block * @crc2: crc32c of the second block * @len2: length of the second block * - * Return: The __crc32c_le() check value of seq1 and seq2 concatenated, - * requiring only crc1, crc2, and len2. Note: If seq_full denotes - * the concatenated memory area of seq1 with seq2, and crc_full - * the __crc32c_le() value of seq_full, then crc_full == - * __crc32c_le_combine(crc1, crc2, len2) when crc_full was - * seeded with the same initializer as crc1, and crc2 seed - * was 0. See also crc32c_combine_test(). + * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring + * only crc1, crc2, and len2. Note: If seq_full denotes the concatenated + * memory area of seq1 with seq2, and crc_full the crc32c() value of + * seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when + * crc_full was seeded with the same initializer as crc1, and crc2 seed + * was 0. See also crc_combine_test(). */ -u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len); - -static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) +static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2) { - return __crc32c_le_shift(crc1, len2) ^ crc2; + return crc32c_shift(crc1, len2) ^ crc2; } #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 357ae4611a45..b8cff2f4309a 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -2,11 +2,6 @@ #ifndef _LINUX_CRC32C_H #define _LINUX_CRC32C_H -#include <linux/types.h> - -extern u32 crc32c(u32 crc, const void *address, unsigned int length); - -/* This macro exists for backwards-compatibility. */ -#define crc32c_le crc32c +#include <linux/crc32.h> #endif /* _LINUX_CRC32C_H */ diff --git a/include/linux/crc64.h b/include/linux/crc64.h index e044c60d1e61..41de30b907df 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -7,12 +7,40 @@ #include <linux/types.h> -#define CRC64_ROCKSOFT_STRING "crc64-rocksoft" +u64 crc64_be_arch(u64 crc, const u8 *p, size_t len); +u64 crc64_be_generic(u64 crc, const u8 *p, size_t len); +u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len); +u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len); -u64 __pure crc64_be(u64 crc, const void *p, size_t len); -u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len); +/** + * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 + * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, + * or the previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + */ +static inline u64 crc64_be(u64 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC64_ARCH)) + return crc64_be_arch(crc, p, len); + return crc64_be_generic(crc, p, len); +} -u64 crc64_rocksoft(const unsigned char *buffer, size_t len); -u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len); +/** + * crc64_nvme - Calculate CRC64-NVME + * @crc: seed value for computation. 0 for a new CRC calculation, or the + * previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + * + * This computes the CRC64 defined in the NVME NVM Command Set Specification, + * *including the bitwise inversion at the beginning and end*. + */ +static inline u64 crc64_nvme(u64 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC64_ARCH)) + return ~crc64_nvme_arch(~crc, p, len); + return ~crc64_nvme_generic(~crc, p, len); +} #endif /* _LINUX_CRC64_H */ diff --git a/include/linux/crc7.h b/include/linux/crc7.h index b462842f3c32..61d34749e437 100644 --- a/include/linux/crc7.h +++ b/include/linux/crc7.h @@ -3,13 +3,6 @@ #define _LINUX_CRC7_H #include <linux/types.h> -extern const u8 crc7_be_syndrome_table[256]; - -static inline u8 crc7_be_byte(u8 crc, u8 data) -{ - return crc7_be_syndrome_table[crc ^ data]; -} - extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len); #endif diff --git a/include/linux/cred.h b/include/linux/cred.h index 1e1ec8834e45..5658a3bfe803 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -155,8 +155,6 @@ extern struct cred *prepare_creds(void); extern struct cred *prepare_exec_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *); -extern void revert_creds(const struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); @@ -172,48 +170,17 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } -/* - * Override creds without bumping reference count. Caller must ensure - * reference remains valid or has taken reference. Almost always not the - * interface you want. Use override_creds()/revert_creds() instead. - */ -static inline const struct cred *override_creds_light(const struct cred *override_cred) +static inline const struct cred *override_creds(const struct cred *override_cred) { return rcu_replace_pointer(current->cred, override_cred, 1); } -static inline const struct cred *revert_creds_light(const struct cred *revert_cred) +static inline const struct cred *revert_creds(const struct cred *revert_cred) { return rcu_replace_pointer(current->cred, revert_cred, 1); } /** - * get_new_cred_many - Get references on a new set of credentials - * @cred: The new credentials to reference - * @nr: Number of references to acquire - * - * Get references on the specified set of new credentials. The caller must - * release all acquired references. - */ -static inline struct cred *get_new_cred_many(struct cred *cred, int nr) -{ - atomic_long_add(nr, &cred->usage); - return cred; -} - -/** - * get_new_cred - Get a reference on a new set of credentials - * @cred: The new credentials to reference - * - * Get a reference on the specified set of new credentials. The caller must - * release the reference. - */ -static inline struct cred *get_new_cred(struct cred *cred) -{ - return get_new_cred_many(cred, 1); -} - -/** * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference * @nr: Number of references to acquire @@ -233,7 +200,8 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) if (!cred) return cred; nonconst_cred->non_rcu = 0; - return get_new_cred_many(nonconst_cred, nr); + atomic_long_add(nr, &nonconst_cred->usage); + return cred; } /* diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b164da5e129e..1e3809d28abd 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -13,6 +13,8 @@ #define _LINUX_CRYPTO_H #include <linux/completion.h> +#include <linux/errno.h> +#include <linux/list.h> #include <linux/refcount.h> #include <linux/slab.h> #include <linux/types.h> @@ -22,7 +24,6 @@ */ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 -#define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 #define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 @@ -124,6 +125,9 @@ */ #define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 +/* Set if the algorithm supports request chains and virtual addresses. */ +#define CRYPTO_ALG_REQ_CHAIN 0x00040000 + /* * Transform masks and values (for crt_flags). */ @@ -133,6 +137,7 @@ #define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS 0x00000100 #define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 +#define CRYPTO_TFM_REQ_ON_STACK 0x00000800 /* * Miscellaneous stuff. @@ -174,6 +179,7 @@ struct crypto_async_request { struct crypto_tfm *tfm; u32 flags; + int err; }; /** @@ -239,26 +245,7 @@ struct cipher_alg { void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -/** - * struct compress_alg - compression/decompression algorithm - * @coa_compress: Compress a buffer of specified length, storing the resulting - * data in the specified buffer. Return the length of the - * compressed data in dlen. - * @coa_decompress: Decompress the source buffer, storing the uncompressed - * data in the specified buffer. The length of the data is - * returned in dlen. - * - * All fields are mandatory. - */ -struct compress_alg { - int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen); - int (*coa_decompress)(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen); -}; - #define cra_cipher cra_u.cipher -#define cra_compress cra_u.compress /** * struct crypto_alg - definition of a cryptograpic cipher algorithm @@ -309,7 +296,7 @@ struct compress_alg { * transformation types. There are multiple options, such as * &crypto_skcipher_type, &crypto_ahash_type, &crypto_rng_type. * This field might be empty. In that case, there are no common - * callbacks. This is the case for: cipher, compress, shash. + * callbacks. This is the case for: cipher. * @cra_u: Callbacks implementing the transformation. This is a union of * multiple structures. Depending on the type of transformation selected * by @cra_type and @cra_flags above, the associated structure must be @@ -328,8 +315,6 @@ struct compress_alg { * @cra_init. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. - * @cra_u.compress: Union member which contains a (de)compression algorithm. - * See @struct @compress_alg. * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE * @cra_list: internally used * @cra_users: internally used @@ -359,7 +344,6 @@ struct crypto_alg { union { struct cipher_alg cipher; - struct compress_alg compress; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); @@ -433,10 +417,6 @@ struct crypto_tfm { void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; -struct crypto_comp { - struct crypto_tfm base; -}; - /* * Transform user interface. */ @@ -493,52 +473,23 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) return __alignof__(tfm->__crt_ctx); } -static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_comp *)tfm; -} - -static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, - u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_COMPRESS; - mask |= CRYPTO_ALG_TYPE_MASK; - - return __crypto_comp_cast(crypto_alloc_base(alg_name, type, mask)); -} - -static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm) +static inline void crypto_reqchain_init(struct crypto_async_request *req) { - return &tfm->base; + req->err = -EINPROGRESS; + INIT_LIST_HEAD(&req->list); } -static inline void crypto_free_comp(struct crypto_comp *tfm) +static inline void crypto_request_chain(struct crypto_async_request *req, + struct crypto_async_request *head) { - crypto_free_tfm(crypto_comp_tfm(tfm)); + req->err = -EINPROGRESS; + list_add_tail(&req->list, &head->list); } -static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) +static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) { - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_COMPRESS; - mask |= CRYPTO_ALG_TYPE_MASK; - - return crypto_has_alg(alg_name, type, mask); + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; } -static inline const char *crypto_comp_name(struct crypto_comp *tfm) -{ - return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); -} - -int crypto_comp_compress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); - -int crypto_comp_decompress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); - #endif /* _LINUX_CRYPTO_H */ diff --git a/include/linux/damon.h b/include/linux/damon.h index a67f2c4940e9..47e36e6ea203 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -36,6 +36,16 @@ struct damon_addr_range { }; /** + * struct damon_size_range - Represents size for filter to operate on [@min, @max]. + * @min: Min size (inclusive). + * @max: Max size (inclusive). + */ +struct damon_size_range { + unsigned long min; + unsigned long max; +}; + +/** * struct damon_region - Represents a monitoring target region. * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. @@ -193,11 +203,16 @@ struct damos_quota_goal { * size quota is set, DAMON tries to apply the action only up to &sz bytes * within &reset_interval. * - * Internally, the time quota is transformed to a size quota using estimated - * throughput of the scheme's action. DAMON then compares it against &sz and - * uses smaller one as the effective quota. + * To convince the different types of quotas and goals, DAMON internally + * converts those into one single size quota called "effective quota". DAMON + * internally uses it as the only one real quota. The conversion is made as + * follows. + * + * The time quota is transformed to a size quota using estimated throughput of + * the scheme's action. DAMON then compares it against &sz and uses smaller + * one as the effective quota. * - * If @goals is not empt, DAMON calculates yet another size quota based on the + * If @goals is not empty, DAMON calculates yet another size quota based on the * goals using its internal feedback loop algorithm, for every @reset_interval. * Then, if the new size quota is smaller than the effective quota, it uses the * new size quota as the effective quota. @@ -286,21 +301,44 @@ struct damos_watermarks { * @sz_tried: Total size of regions that the scheme is tried to be applied. * @nr_applied: Total number of regions that the scheme is applied. * @sz_applied: Total size of regions that the scheme is applied. + * @sz_ops_filter_passed: + * Total bytes that passed ops layer-handled DAMOS filters. * @qt_exceeds: Total number of times the quota of the scheme has exceeded. + * + * "Tried an action to a region" in this context means the DAMOS core logic + * determined the region as eligible to apply the action. The access pattern + * (&struct damos_access_pattern), quotas (&struct damos_quota), watermarks + * (&struct damos_watermarks) and filters (&struct damos_filter) that handled + * on core logic can affect this. The core logic asks the operation set + * (&struct damon_operations) to apply the action to the region. + * + * "Applied an action to a region" in this context means the operation set + * (&struct damon_operations) successfully applied the action to the region, at + * least to a part of the region. The filters (&struct damos_filter) that + * handled on operation set layer and type of the action and pages of the + * region can affect this. For example, if a filter is set to exclude + * anonymous pages and the region has only anonymous pages, the region will be + * failed at applying the action. If the action is &DAMOS_PAGEOUT and all + * pages of the region are already paged out, the region will be failed at + * applying the action. */ struct damos_stat { unsigned long nr_tried; unsigned long sz_tried; unsigned long nr_applied; unsigned long sz_applied; + unsigned long sz_ops_filter_passed; unsigned long qt_exceeds; }; /** * enum damos_filter_type - Type of memory for &struct damos_filter * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. + * @DAMOS_FILTER_TYPE_ACTIVE: Active pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages. + * @DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: Page is part of a hugepage. + * @DAMOS_FILTER_TYPE_UNMAPPED: Unmapped pages. * @DAMOS_FILTER_TYPE_ADDR: Address range. * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. * @NR_DAMOS_FILTER_TYPES: Number of filter types. @@ -318,8 +356,11 @@ struct damos_stat { */ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, + DAMOS_FILTER_TYPE_ACTIVE, DAMOS_FILTER_TYPE_MEMCG, DAMOS_FILTER_TYPE_YOUNG, + DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, + DAMOS_FILTER_TYPE_UNMAPPED, DAMOS_FILTER_TYPE_ADDR, DAMOS_FILTER_TYPE_TARGET, NR_DAMOS_FILTER_TYPES, @@ -327,31 +368,61 @@ enum damos_filter_type { /** * struct damos_filter - DAMOS action target memory filter. - * @type: Type of the page. - * @matching: If the matching page should filtered out or in. + * @type: Type of the target memory. + * @matching: Whether this is for @type-matching memory. + * @allow: Whether to include or exclude the @matching memory. * @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG. * @addr_range: Address range if @type is DAMOS_FILTER_TYPE_ADDR. * @target_idx: Index of the &struct damon_target of * &damon_ctx->adaptive_targets if @type is * DAMOS_FILTER_TYPE_TARGET. + * @sz_range: Size range if @type is DAMOS_FILTER_TYPE_HUGEPAGE_SIZE. * @list: List head for siblings. * * Before applying the &damos->action to a memory region, DAMOS checks if each - * page of the region matches to this and avoid applying the action if so. - * Support of each filter type depends on the running &struct damon_operations - * and the type. Refer to &enum damos_filter_type for more detai. + * byte of the region matches to this given condition and avoid applying the + * action if so. Support of each filter type depends on the running &struct + * damon_operations and the type. Refer to &enum damos_filter_type for more + * details. */ struct damos_filter { enum damos_filter_type type; bool matching; + bool allow; union { unsigned short memcg_id; struct damon_addr_range addr_range; int target_idx; + struct damon_size_range sz_range; }; struct list_head list; }; +struct damon_ctx; +struct damos; + +/** + * struct damos_walk_control - Control damos_walk(). + * + * @walk_fn: Function to be called back for each region. + * @data: Data that will be passed to walk functions. + * + * Control damos_walk(), which requests specific kdamond to invoke the given + * function to each region that eligible to apply actions of the kdamond's + * schemes. Refer to damos_walk() for more details. + */ +struct damos_walk_control { + void (*walk_fn)(void *data, struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *s, unsigned long sz_filter_passed); + void *data; +/* private: internal use only */ + /* informs if the kdamond finished handling of the walk request */ + struct completion completion; + /* informs if the walk is canceled. */ + bool canceled; +}; + /** * struct damos_access_pattern - Target access pattern of the given scheme. * @min_sz_region: Minimum size of target regions. @@ -379,6 +450,8 @@ struct damos_access_pattern { * @wmarks: Watermarks for automated (in)activation of this scheme. * @target_nid: Destination node if @action is "migrate_{hot,cold}". * @filters: Additional set of &struct damos_filter for &action. + * @ops_filters: ops layer handling &struct damos_filter objects list. + * @last_applied: Last @action applied ops-managing entity. * @stat: Statistics of this scheme. * @list: List head for siblings. * @@ -401,6 +474,15 @@ struct damos_access_pattern { * implementation could check pages of the region and skip &action to respect * &filters * + * The minimum entity that @action can be applied depends on the underlying + * &struct damon_operations. Since it may not be aligned with the core layer + * abstract, namely &struct damon_region, &struct damon_operations could apply + * @action to same entity multiple times. Large folios that underlying on + * multiple &struct damon region objects could be such examples. The &struct + * damon_operations can use @last_applied to avoid that. DAMOS core logic + * unsets @last_applied when each regions walking for applying the scheme is + * finished. + * * After applying the &action to each region, &stat_count and &stat_sz is * updated to reflect the number of regions and total size of regions that the * &action is applied. @@ -415,6 +497,16 @@ struct damos { * @action */ unsigned long next_apply_sis; + /* informs if ongoing DAMOS walk for this scheme is finished */ + bool walk_completed; + /* + * If the current region in the filtering stage is allowed by core + * layer-handled filters. If true, operations layer allows it, too. + */ + bool core_filters_allowed; + /* whether to reject core/ops filters umatched regions */ + bool core_filters_default_reject; + bool ops_filters_default_reject; /* public: */ struct damos_quota quota; struct damos_watermarks wmarks; @@ -422,6 +514,8 @@ struct damos { int target_nid; }; struct list_head filters; + struct list_head ops_filters; + void *last_applied; struct damos_stat stat; struct list_head list; }; @@ -442,8 +536,6 @@ enum damon_ops_id { NR_DAMON_OPS, }; -struct damon_ctx; - /** * struct damon_operations - Monitoring operations for given use cases. * @@ -452,7 +544,6 @@ struct damon_ctx; * @update: Update operations-related data structures. * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. - * @reset_aggregated: Reset aggregated accesses monitoring results. * @get_scheme_score: Get the score of a region for a scheme. * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. @@ -464,8 +555,7 @@ struct damon_ctx; * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting * the monitoring, @update after each &damon_attrs.ops_update_interval, and * @check_accesses, @target_valid and @prepare_access_checks after each - * &damon_attrs.sample_interval. Finally, @reset_aggregated is called after - * each &damon_attrs.aggr_interval. + * &damon_attrs.sample_interval. * * Each &struct damon_operations instance having valid @id can be registered * via damon_register_ops() and selected by damon_select_ops() later. @@ -480,14 +570,13 @@ struct damon_ctx; * last preparation and update the number of observed accesses of each region. * It should also return max number of observed accesses that made as a result * of its update. The value will be used for regions adjustment threshold. - * @reset_aggregated should reset the access monitoring results that aggregated - * by @check_accesses. * @get_scheme_score should return the priority score of a region for a scheme * as an integer in [0, &DAMOS_MAX_SCORE]. * @apply_scheme is called from @kdamond when a region for user provided * DAMON-based operation scheme is found. It should apply the scheme's action * to the region and return bytes of the region that the action is successfully - * applied. + * applied. It should also report how many bytes of the region has passed + * filters (&struct damos_filter) that handled by itself. * @target_valid should check whether the target is still valid for the * monitoring. * @cleanup is called from @kdamond just before its termination. @@ -498,13 +587,12 @@ struct damon_operations { void (*update)(struct damon_ctx *context); void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); - void (*reset_aggregated)(struct damon_ctx *context); int (*get_scheme_score)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); unsigned long (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, - struct damos *scheme); + struct damos *scheme, unsigned long *sz_filter_passed); bool (*target_valid)(struct damon_target *t); void (*cleanup)(struct damon_ctx *context); }; @@ -512,52 +600,84 @@ struct damon_operations { /** * struct damon_callback - Monitoring events notification callbacks. * - * @before_start: Called before starting the monitoring. * @after_wmarks_check: Called after each schemes' watermarks check. - * @after_sampling: Called after each sampling. * @after_aggregation: Called after each aggregation. - * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. - * @private: User private data. * - * The monitoring thread (&damon_ctx.kdamond) calls @before_start and - * @before_terminate just before starting and finishing the monitoring, - * respectively. Therefore, those are good places for installing and cleaning - * @private. + * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just + * before finishing the monitoring. * * The monitoring thread calls @after_wmarks_check after each DAMON-based * operation schemes' watermarks check. If users need to make changes to the * attributes of the monitoring context while it's deactivated due to the * watermarks, this is the good place to do. * - * The monitoring thread calls @after_sampling and @after_aggregation for each - * of the sampling intervals and aggregation intervals, respectively. - * Therefore, users can safely access the monitoring results without additional - * protection. For the reason, users are recommended to use these callback for - * the accesses to the results. + * The monitoring thread calls @after_aggregation for each of the aggregation + * intervals. Therefore, users can safely access the monitoring results + * without additional protection. For the reason, users are recommended to use + * these callback for the accesses to the results. * * If any callback returns non-zero, monitoring stops. */ struct damon_callback { - void *private; - - int (*before_start)(struct damon_ctx *context); int (*after_wmarks_check)(struct damon_ctx *context); - int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); - int (*before_damos_apply)(struct damon_ctx *context, - struct damon_target *target, - struct damon_region *region, - struct damos *scheme); void (*before_terminate)(struct damon_ctx *context); }; +/* + * struct damon_call_control - Control damon_call(). + * + * @fn: Function to be called back. + * @data: Data that will be passed to @fn. + * @return_code: Return code from @fn invocation. + * + * Control damon_call(), which requests specific kdamond to invoke a given + * function. Refer to damon_call() for more details. + */ +struct damon_call_control { + int (*fn)(void *data); + void *data; + int return_code; +/* private: internal use only */ + /* informs if the kdamond finished handling of the request */ + struct completion completion; + /* informs if the kdamond canceled @fn infocation */ + bool canceled; +}; + +/** + * struct damon_intervals_goal - Monitoring intervals auto-tuning goal. + * + * @access_bp: Access events observation ratio to achieve in bp. + * @aggrs: Number of aggregations to acheive @access_bp within. + * @min_sample_us: Minimum resulting sampling interval in microseconds. + * @max_sample_us: Maximum resulting sampling interval in microseconds. + * + * DAMON automatically tunes &damon_attrs->sample_interval and + * &damon_attrs->aggr_interval aiming the ratio in bp (1/10,000) of + * DAMON-observed access events to theoretical maximum amount within @aggrs + * aggregations be same to @access_bp. The logic increases + * &damon_attrs->aggr_interval and &damon_attrs->sampling_interval in same + * ratio if the current access events observation ratio is lower than the + * target for each @aggrs aggregations, and vice versa. + * + * If @aggrs is zero, the tuning is disabled and hence this struct is ignored. + */ +struct damon_intervals_goal { + unsigned long access_bp; + unsigned long aggrs; + unsigned long min_sample_us; + unsigned long max_sample_us; +}; + /** * struct damon_attrs - Monitoring attributes for accuracy/overhead control. * * @sample_interval: The time between access samplings. * @aggr_interval: The time between monitor results aggregations. * @ops_update_interval: The time between monitoring operations updates. + * @intervals_goal: Intervals auto-tuning goal. * @min_nr_regions: The minimum number of adaptive monitoring * regions. * @max_nr_regions: The maximum number of adaptive monitoring @@ -577,8 +697,20 @@ struct damon_attrs { unsigned long sample_interval; unsigned long aggr_interval; unsigned long ops_update_interval; + struct damon_intervals_goal intervals_goal; unsigned long min_nr_regions; unsigned long max_nr_regions; +/* private: internal use only */ + /* + * @aggr_interval to @sample_interval ratio. + * Core-external components call damon_set_attrs() with &damon_attrs + * that this field is unset. In the case, damon_set_attrs() sets this + * field of resulting &damon_attrs. Core-internal components such as + * kdamond_tune_intervals() calls damon_set_attrs() with &damon_attrs + * that this field is set. In the case, damon_set_attrs() just keep + * it. + */ + unsigned long aggr_samples; }; /** @@ -627,11 +759,22 @@ struct damon_ctx { * update */ unsigned long next_ops_update_sis; + /* + * number of sample intervals that should be passed before next + * intervals tuning + */ + unsigned long next_intervals_tune_sis; /* for waiting until the execution of the kdamond_fn is started */ struct completion kdamond_started; /* for scheme quotas prioritization */ unsigned long *regions_score_histogram; + struct damon_call_control *call_control; + struct mutex call_control_lock; + + struct damos_walk_control *walk_control; + struct mutex walk_control_lock; + /* public: */ struct task_struct *kdamond; struct mutex kdamond_lock; @@ -702,6 +845,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damos_for_each_filter_safe(f, next, scheme) \ list_for_each_entry_safe(f, next, &(scheme)->filters, list) +#define damos_for_each_ops_filter(f, scheme) \ + list_for_each_entry(f, &(scheme)->ops_filters, list) + +#define damos_for_each_ops_filter_safe(f, next, scheme) \ + list_for_each_entry_safe(f, next, &(scheme)->ops_filters, list) + #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); @@ -725,8 +874,9 @@ void damon_update_region_access_rate(struct damon_region *r, bool accessed, struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, - bool matching); + bool matching, bool allow); void damos_add_filter(struct damos *s, struct damos_filter *f); +bool damos_filter_for_ops(enum damos_filter_type type); void damos_destroy_filter(struct damos_filter *f); struct damos_quota_goal *damos_new_quota_goal( @@ -779,6 +929,9 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); +int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); +int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); + int damon_set_region_biggest_system_ram_default(struct damon_target *t, unsigned long *start, unsigned long *end); diff --git a/include/linux/dax.h b/include/linux/dax.h index df41a0017b31..dcc9fcdf14e4 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -207,6 +207,11 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); +static inline bool dax_page_is_idle(struct page *page) +{ + return page && page_ref_count(page) == 0; +} + #if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); @@ -220,6 +225,19 @@ static inline void dax_read_unlock(int id) { } #endif /* CONFIG_DAX */ + +#if !IS_ENABLED(CONFIG_FS_DAX) +static inline int __must_check dax_break_layout(struct inode *inode, + loff_t start, loff_t end, void (cb)(struct inode *)) +{ + return 0; +} + +static inline void dax_break_layout_final(struct inode *inode) +{ +} +#endif + bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, @@ -241,8 +259,18 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +void dax_delete_mapping_range(struct address_space *mapping, + loff_t start, loff_t end); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int __must_check dax_break_layout(struct inode *inode, loff_t start, + loff_t end, void (cb)(struct inode *)); +static inline int __must_check dax_break_layout_inode(struct inode *inode, + void (cb)(struct inode *)) +{ + return dax_break_layout(inode, 0, LLONG_MAX, cb); +} +void dax_break_layout_final(struct inode *inode); int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bff956f7b2b9..e9f07e37dd6f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -57,6 +57,7 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } +#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; @@ -68,16 +69,24 @@ extern const struct qstr dotdot_name; * large memory footprint increase). */ #ifdef CONFIG_64BIT -# define DNAME_INLINE_LEN 40 /* 192 bytes */ +# define DNAME_INLINE_WORDS 5 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# define DNAME_INLINE_WORDS 9 /* 128 bytes */ # else -# define DNAME_INLINE_LEN 44 /* 128 bytes */ +# define DNAME_INLINE_WORDS 11 /* 128 bytes */ # endif #endif +#define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long)) + +union shortname_store { + unsigned char string[DNAME_INLINE_LEN]; + unsigned long words[DNAME_INLINE_WORDS]; +}; + #define d_lock d_lockref.lock +#define d_iname d_shortname.string struct dentry { /* RCU lookup touched fields */ @@ -88,7 +97,7 @@ struct dentry { struct qstr d_name; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ - unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + union shortname_store d_shortname; /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ @@ -136,7 +145,8 @@ enum d_real_type { }; struct dentry_operations { - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -150,6 +160,8 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); } ____cacheline_aligned; /* @@ -161,65 +173,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) - -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(14) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(15) - -#define DCACHE_MOUNTED BIT(16) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(17) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(18) /* manage transit from this dirent */ +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; + #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(19) - -#define DCACHE_ENTRY_TYPE (7 << 20) /* bits 20..22 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 20) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 20) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 20) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 20) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 20) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 20) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 20) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(25) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(26) - -#define DCACHE_PAR_LOOKUP BIT(28) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(29) -#define DCACHE_NORCU BIT(30) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* @@ -241,7 +247,6 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name); -extern struct dentry * d_exact_alias(struct dentry *, struct inode *); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); @@ -508,12 +513,7 @@ static inline int simple_positive(const struct dentry *dentry) return d_really_is_positive(dentry) && !d_unhashed(dentry); } -extern int sysctl_vfs_cache_pressure; - -static inline unsigned long vfs_pressure_ratio(unsigned long val) -{ - return mult_frac(val, sysctl_vfs_cache_pressure, 100); -} +unsigned long vfs_pressure_ratio(unsigned long val); /** * d_inode - Get the actual inode of this dentry @@ -589,7 +589,7 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) struct name_snapshot { struct qstr name; - unsigned char inline_name[DNAME_INLINE_LEN]; + union shortname_store inline_name; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 59444b495d49..fa2568b4380d 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -67,21 +67,23 @@ static const struct file_operations __fops = { \ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); -#if defined(CONFIG_DEBUG_FS) - -struct dentry *debugfs_lookup(const char *name, struct dentry *parent); - struct debugfs_short_fops { ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); loff_t (*llseek) (struct file *, loff_t, int); }; +#if defined(CONFIG_DEBUG_FS) + +struct dentry *debugfs_lookup(const char *name, struct dentry *parent); + struct dentry *debugfs_create_file_full(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct file_operations *fops); struct dentry *debugfs_create_file_short(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct debugfs_short_fops *fops); /** @@ -126,7 +128,15 @@ struct dentry *debugfs_create_file_short(const char *name, umode_t mode, const struct debugfs_short_fops *: debugfs_create_file_short, \ struct file_operations *: debugfs_create_file_full, \ struct debugfs_short_fops *: debugfs_create_file_short) \ - (name, mode, parent, data, fops) + (name, mode, parent, data, NULL, fops) + +#define debugfs_create_file_aux(name, mode, parent, data, aux, fops) \ + _Generic(fops, \ + const struct file_operations *: debugfs_create_file_full, \ + const struct debugfs_short_fops *: debugfs_create_file_short, \ + struct file_operations *: debugfs_create_file_full, \ + struct debugfs_short_fops *: debugfs_create_file_short) \ + (name, mode, parent, data, aux, fops) struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, @@ -153,6 +163,7 @@ void debugfs_remove(struct dentry *dentry); void debugfs_lookup_and_remove(const char *name, struct dentry *parent); const struct file_operations *debugfs_real_fops(const struct file *filp); +const void *debugfs_get_aux(const struct file *file); int debugfs_file_get(struct dentry *dentry); void debugfs_file_put(struct dentry *dentry); @@ -164,8 +175,7 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos); -struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, - struct dentry *new_dir, const char *new_name); +int debugfs_change_name(struct dentry *dentry, const char *fmt, ...) __printf(2, 3); void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value); @@ -259,6 +269,14 @@ static inline struct dentry *debugfs_lookup(const char *name, return ERR_PTR(-ENODEV); } +static inline struct dentry *debugfs_create_file_aux(const char *name, + umode_t mode, struct dentry *parent, + void *data, void *aux, + const void *fops) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const void *fops) @@ -312,6 +330,7 @@ static inline void debugfs_lookup_and_remove(const char *name, { } const struct file_operations *debugfs_real_fops(const struct file *filp); +void *debugfs_get_aux(const struct file *file); static inline int debugfs_file_get(struct dentry *dentry) { @@ -341,10 +360,10 @@ static inline ssize_t debugfs_attr_write_signed(struct file *file, return -ENODEV; } -static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, - struct dentry *new_dir, char *new_name) +static inline int __printf(2, 3) debugfs_change_name(struct dentry *dentry, + const char *fmt, ...) { - return ERR_PTR(-ENODEV); + return -ENODEV; } static inline void debugfs_create_u8(const char *name, umode_t mode, @@ -452,6 +471,11 @@ static inline ssize_t debugfs_read_file_str(struct file *file, #endif +#define debugfs_create_file_aux_num(name, mode, parent, data, n, fops) \ + debugfs_create_file_aux(name, mode, parent, data, \ + (void *)(unsigned long)n, fops) +#define debugfs_get_aux_num(f) (unsigned long)debugfs_get_aux(f) + /** * debugfs_create_xul - create a debugfs file that is used to read and write an * unsigned long value, formatted in hexadecimal diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6639f48dac36..800dcc360db2 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -29,25 +29,39 @@ struct task_delay_info { * XXX_delay contains the accumulated delay time in nanoseconds. */ u64 blkio_start; + u64 blkio_delay_max; + u64 blkio_delay_min; u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_start; + u64 swapin_delay_max; + u64 swapin_delay_min; u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ u32 swapin_count; /* total count of swapin */ u64 freepages_start; + u64 freepages_delay_max; + u64 freepages_delay_min; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; + u64 thrashing_delay_max; + u64 thrashing_delay_min; u64 thrashing_delay; /* wait for thrashing page */ u64 compact_start; + u64 compact_delay_max; + u64 compact_delay_min; u64 compact_delay; /* wait for memory compact */ u64 wpcopy_start; + u64 wpcopy_delay_max; + u64 wpcopy_delay_min; u64 wpcopy_delay; /* wait for write-protect copy */ + u64 irq_delay_max; + u64 irq_delay_min; u64 irq_delay; /* wait for IRQ/SOFTIRQ */ u32 freepages_count; /* total count of memory reclaim */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8321f65897f3..bcc6d7b69470 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -299,6 +299,9 @@ struct target_type { #define dm_target_supports_mixed_zoned_model(type) (false) #endif +#define DM_TARGET_ATOMIC_WRITES 0x00000400 +#define dm_target_supports_atomic_writes(type) ((type)->features & DM_TARGET_ATOMIC_WRITES) + struct dm_target { struct dm_table *table; struct target_type *type; diff --git a/include/linux/device.h b/include/linux/device.h index 667cb6db9019..79e49fe494b7 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -26,9 +26,9 @@ #include <linux/atomic.h> #include <linux/uidgid.h> #include <linux/gfp.h> -#include <linux/overflow.h> #include <linux/device/bus.h> #include <linux/device/class.h> +#include <linux/device/devres.h> #include <linux/device/driver.h> #include <linux/cleanup.h> #include <asm/device.h> @@ -281,125 +281,24 @@ int __must_check device_create_bin_file(struct device *dev, void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); -/* device resource management */ -typedef void (*dr_release_t)(struct device *dev, void *res); -typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); - -void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, - int nid, const char *name) __malloc; -#define devres_alloc(release, size, gfp) \ - __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) -#define devres_alloc_node(release, size, gfp, nid) \ - __devres_alloc_node(release, size, gfp, nid, #release) - -void devres_for_each_res(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data, - void (*fn)(struct device *, void *, void *), - void *data); -void devres_free(void *res); -void devres_add(struct device *dev, void *res); -void *devres_find(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); -void *devres_get(struct device *dev, void *new_res, - dr_match_t match, void *match_data); -void *devres_remove(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); -int devres_destroy(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); -int devres_release(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); - -/* devres group */ -void * __must_check devres_open_group(struct device *dev, void *id, gfp_t gfp); -void devres_close_group(struct device *dev, void *id); -void devres_remove_group(struct device *dev, void *id); -int devres_release_group(struct device *dev, void *id); - -/* managed devm_k.alloc/kfree for device drivers */ -void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2); -void *devm_krealloc(struct device *dev, void *ptr, size_t size, - gfp_t gfp) __must_check __realloc_size(3); -__printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, - const char *fmt, va_list ap) __malloc; -__printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, - const char *fmt, ...) __malloc; -static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp) -{ - return devm_kmalloc(dev, size, gfp | __GFP_ZERO); -} -static inline void *devm_kmalloc_array(struct device *dev, - size_t n, size_t size, gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(n, size, &bytes))) - return NULL; - - return devm_kmalloc(dev, bytes, flags); -} -static inline void *devm_kcalloc(struct device *dev, - size_t n, size_t size, gfp_t flags) -{ - return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO); -} -static inline __realloc_size(3, 4) void * __must_check -devm_krealloc_array(struct device *dev, void *p, size_t new_n, size_t new_size, gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) - return NULL; - - return devm_krealloc(dev, p, bytes, flags); -} - -void devm_kfree(struct device *dev, const void *p); -char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc; -const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); -void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) - __realloc_size(3); - -unsigned long devm_get_free_pages(struct device *dev, - gfp_t gfp_mask, unsigned int order); -void devm_free_pages(struct device *dev, unsigned long addr); - -#ifdef CONFIG_HAS_IOMEM -void __iomem *devm_ioremap_resource(struct device *dev, - const struct resource *res); -void __iomem *devm_ioremap_resource_wc(struct device *dev, - const struct resource *res); - -void __iomem *devm_of_iomap(struct device *dev, - struct device_node *node, int index, - resource_size_t *size); -#else - -static inline -void __iomem *devm_ioremap_resource(struct device *dev, - const struct resource *res) -{ - return ERR_PTR(-EINVAL); -} - -static inline -void __iomem *devm_ioremap_resource_wc(struct device *dev, - const struct resource *res) -{ - return ERR_PTR(-EINVAL); -} +/* allows to add/remove a custom action to devres stack */ +int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); +/** + * devm_remove_action() - removes previously added custom action + * @dev: Device that owns the action + * @action: Function implementing the action + * @data: Pointer to data passed to @action implementation + * + * Removes instance of @action previously added by devm_add_action(). + * Both action and data should match one of the existing entries. + */ static inline -void __iomem *devm_of_iomap(struct device *dev, - struct device_node *node, int index, - resource_size_t *size) +void devm_remove_action(struct device *dev, void (*action)(void *), void *data) { - return ERR_PTR(-EINVAL); + WARN_ON(devm_remove_action_nowarn(dev, action, data)); } -#endif - -/* allows to add/remove a custom action to devres stack */ -void devm_remove_action(struct device *dev, void (*action)(void *), void *data); void devm_release_action(struct device *dev, void (*action)(void *), void *data); int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); @@ -1009,6 +908,15 @@ static inline bool dev_pm_test_driver_flags(struct device *dev, u32 flags) return !!(dev->power.driver_flags & flags); } +static inline bool dev_pm_smart_suspend(struct device *dev) +{ +#ifdef CONFIG_PM_SLEEP + return dev->power.smart_suspend; +#else + return false; +#endif +} + static inline void device_lock(struct device *dev) { mutex_lock(&dev->mutex); @@ -1074,18 +982,44 @@ void device_del(struct device *dev); DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) -int device_for_each_child(struct device *dev, void *data, - int (*fn)(struct device *dev, void *data)); -int device_for_each_child_reverse(struct device *dev, void *data, - int (*fn)(struct device *dev, void *data)); +int device_for_each_child(struct device *parent, void *data, + device_iter_t fn); +int device_for_each_child_reverse(struct device *parent, void *data, + device_iter_t fn); int device_for_each_child_reverse_from(struct device *parent, - struct device *from, const void *data, - int (*fn)(struct device *, const void *)); -struct device *device_find_child(struct device *dev, void *data, - int (*match)(struct device *dev, void *data)); -struct device *device_find_child_by_name(struct device *parent, - const char *name); -struct device *device_find_any_child(struct device *parent); + struct device *from, void *data, + device_iter_t fn); +struct device *device_find_child(struct device *parent, const void *data, + device_match_t match); +/** + * device_find_child_by_name - device iterator for locating a child device. + * @parent: parent struct device + * @name: name of the child device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a device that has the name @name. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +static inline struct device *device_find_child_by_name(struct device *parent, + const char *name) +{ + return device_find_child(parent, name, device_match_name); +} + +/** + * device_find_any_child - device iterator for locating a child device, if any. + * @parent: parent struct device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a child device, if any. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +static inline struct device *device_find_any_child(struct device *parent) +{ + return device_find_child(parent, NULL, device_match_any); +} int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, @@ -1149,6 +1083,8 @@ int device_online(struct device *dev); void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void device_set_node(struct device *dev, struct fwnode_handle *fwnode); +int device_add_of_node(struct device *dev, struct device_node *of_node); +void device_remove_of_node(struct device *dev); void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); static inline struct device_node *dev_of_node(struct device *dev) @@ -1226,7 +1162,7 @@ static inline void device_remove_group(struct device *dev, { const struct attribute_group *groups[] = { grp, NULL }; - return device_remove_groups(dev, groups); + device_remove_groups(dev, groups); } int __must_check devm_device_add_group(struct device *dev, diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index cdc4757217f9..f5a56efd2bd6 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -48,6 +48,7 @@ struct fwnode_handle; * will never get called until they do. * @remove: Called when a device removed from this bus. * @shutdown: Called at shut-down time to quiesce the device. + * @irq_get_affinity: Get IRQ affinity mask for the device on this bus. * * @online: Called to put the device back online (after offlining it). * @offline: Called to put the device offline for hot-removal. May fail. @@ -87,6 +88,8 @@ struct bus_type { void (*sync_state)(struct device *dev); void (*remove)(struct device *dev); void (*shutdown)(struct device *dev); + const struct cpumask *(*irq_get_affinity)(struct device *dev, + unsigned int irq_vec); int (*online)(struct device *dev); int (*offline)(struct device *dev); @@ -131,6 +134,7 @@ typedef int (*device_match_t)(struct device *dev, const void *data); /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); +int device_match_type(struct device *dev, const void *type); int device_match_of_node(struct device *dev, const void *np); int device_match_fwnode(struct device *dev, const void *fwnode); int device_match_devt(struct device *dev, const void *pdevt); @@ -138,9 +142,12 @@ int device_match_acpi_dev(struct device *dev, const void *adev); int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); +/* Device iterating function type for various driver core for_each APIs */ +typedef int (*device_iter_t)(struct device *dev, void *data); + /* iterator helpers for buses */ -int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, - int (*fn)(struct device *dev, void *data)); +int bus_for_each_dev(const struct bus_type *bus, struct device *start, + void *data, device_iter_t fn); struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, device_match_t match); /** diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 518c9c83d64b..65880e60c720 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -82,18 +82,16 @@ bool class_is_registered(const struct class *class); struct class_compat; struct class_compat *class_compat_register(const char *name); void class_compat_unregister(struct class_compat *cls); -int class_compat_create_link(struct class_compat *cls, struct device *dev, - struct device *device_link); -void class_compat_remove_link(struct class_compat *cls, struct device *dev, - struct device *device_link); +int class_compat_create_link(struct class_compat *cls, struct device *dev); +void class_compat_remove_link(struct class_compat *cls, struct device *dev); void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, const struct device *start, const struct device_type *type); struct device *class_dev_iter_next(struct class_dev_iter *iter); void class_dev_iter_exit(struct class_dev_iter *iter); -int class_for_each_device(const struct class *class, const struct device *start, void *data, - int (*fn)(struct device *dev, void *data)); +int class_for_each_device(const struct class *class, const struct device *start, + void *data, device_iter_t fn); struct device *class_find_device(const struct class *class, const struct device *start, const void *data, device_match_t match); @@ -195,7 +193,7 @@ static inline int __must_check class_create_file(const struct class *class, static inline void class_remove_file(const struct class *class, const struct class_attribute *attr) { - return class_remove_file_ns(class, attr, NULL); + class_remove_file_ns(class, attr, NULL); } /* Simple class attribute that is just a static string */ diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h new file mode 100644 index 000000000000..9b49f9915850 --- /dev/null +++ b/include/linux/device/devres.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DEVICE_DEVRES_H_ +#define _DEVICE_DEVRES_H_ + +#include <linux/err.h> +#include <linux/gfp_types.h> +#include <linux/numa.h> +#include <linux/overflow.h> +#include <linux/stdarg.h> +#include <linux/types.h> + +struct device; +struct device_node; +struct resource; + +/* device resource management */ +typedef void (*dr_release_t)(struct device *dev, void *res); +typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); + +void * __malloc +__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid, const char *name); +#define devres_alloc(release, size, gfp) \ + __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) +#define devres_alloc_node(release, size, gfp, nid) \ + __devres_alloc_node(release, size, gfp, nid, #release) + +void devres_for_each_res(struct device *dev, dr_release_t release, + dr_match_t match, void *match_data, + void (*fn)(struct device *, void *, void *), + void *data); +void devres_free(void *res); +void devres_add(struct device *dev, void *res); +void *devres_find(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); +void *devres_get(struct device *dev, void *new_res, dr_match_t match, void *match_data); +void *devres_remove(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); +int devres_destroy(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); +int devres_release(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); + +/* devres group */ +void * __must_check devres_open_group(struct device *dev, void *id, gfp_t gfp); +void devres_close_group(struct device *dev, void *id); +void devres_remove_group(struct device *dev, void *id); +int devres_release_group(struct device *dev, void *id); + +/* managed devm_k.alloc/kfree for device drivers */ +void * __alloc_size(2) +devm_kmalloc(struct device *dev, size_t size, gfp_t gfp); +void * __must_check __realloc_size(3) +devm_krealloc(struct device *dev, void *ptr, size_t size, gfp_t gfp); +static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp) +{ + return devm_kmalloc(dev, size, gfp | __GFP_ZERO); +} +static inline void *devm_kmalloc_array(struct device *dev, size_t n, size_t size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + return devm_kmalloc(dev, bytes, flags); +} +static inline void *devm_kcalloc(struct device *dev, size_t n, size_t size, gfp_t flags) +{ + return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO); +} +static inline __realloc_size(3, 4) void * __must_check +devm_krealloc_array(struct device *dev, void *p, size_t new_n, size_t new_size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return devm_krealloc(dev, p, bytes, flags); +} + +void devm_kfree(struct device *dev, const void *p); + +void * __realloc_size(3) +devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); +static inline void *devm_kmemdup_array(struct device *dev, const void *src, + size_t n, size_t size, gfp_t flags) +{ + return devm_kmemdup(dev, src, size_mul(size, n), flags); +} + +char * __malloc +devm_kstrdup(struct device *dev, const char *s, gfp_t gfp); +const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); +char * __printf(3, 0) __malloc +devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap); +char * __printf(3, 4) __malloc +devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...); + +unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); +void devm_free_pages(struct device *dev, unsigned long addr); + +#ifdef CONFIG_HAS_IOMEM + +void __iomem *devm_ioremap_resource(struct device *dev, const struct resource *res); +void __iomem *devm_ioremap_resource_wc(struct device *dev, const struct resource *res); + +void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, + resource_size_t *size); +#else + +static inline +void __iomem *devm_ioremap_resource(struct device *dev, const struct resource *res) +{ + return IOMEM_ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_ioremap_resource_wc(struct device *dev, const struct resource *res) +{ + return IOMEM_ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, + resource_size_t *size) +{ + return IOMEM_ERR_PTR(-EINVAL); +} + +#endif + +#endif /* _DEVICE_DEVRES_H_ */ diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 5c04b8e3833b..cd8e0f0a634b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -154,7 +154,7 @@ void driver_remove_file(const struct device_driver *driver, int driver_set_override(struct device *dev, const char **override, const char *s, size_t len); int __must_check driver_for_each_device(struct device_driver *drv, struct device *start, - void *data, int (*fn)(struct device *dev, void *)); + void *data, device_iter_t fn); struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, device_match_t match); diff --git a/include/linux/device/faux.h b/include/linux/device/faux.h new file mode 100644 index 000000000000..9f43c0e46aa4 --- /dev/null +++ b/include/linux/device/faux.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 Greg Kroah-Hartman <gregkh@linuxfoundation.org> + * Copyright (c) 2025 The Linux Foundation + * + * A "simple" faux bus that allows devices to be created and added + * automatically to it. This is to be used whenever you need to create a + * device that is not associated with any "real" system resources, and do + * not want to have to deal with a bus/driver binding logic. It is + * intended to be very simple, with only a create and a destroy function + * available. + */ +#ifndef _FAUX_DEVICE_H_ +#define _FAUX_DEVICE_H_ + +#include <linux/container_of.h> +#include <linux/device.h> + +/** + * struct faux_device - a "faux" device + * @dev: internal struct device of the object + * + * A simple faux device that can be created/destroyed. To be used when a + * driver only needs to have a device to "hang" something off. This can be + * used for downloading firmware or other basic tasks. Use this instead of + * a struct platform_device if the device has no resources assigned to + * it at all. + */ +struct faux_device { + struct device dev; +}; +#define to_faux_device(x) container_of_const((x), struct faux_device, dev) + +/** + * struct faux_device_ops - a set of callbacks for a struct faux_device + * @probe: called when a faux device is probed by the driver core + * before the device is fully bound to the internal faux bus + * code. If probe succeeds, return 0, otherwise return a + * negative error number to stop the probe sequence from + * succeeding. + * @remove: called when a faux device is removed from the system + * + * Both @probe and @remove are optional, if not needed, set to NULL. + */ +struct faux_device_ops { + int (*probe)(struct faux_device *faux_dev); + void (*remove)(struct faux_device *faux_dev); +}; + +struct faux_device *faux_device_create(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops); +struct faux_device *faux_device_create_with_groups(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops, + const struct attribute_group **groups); +void faux_device_destroy(struct faux_device *faux_dev); + +static inline void *faux_device_get_drvdata(const struct faux_device *faux_dev) +{ + return dev_get_drvdata(&faux_dev->dev); +} + +static inline void faux_device_set_drvdata(struct faux_device *faux_dev, void *data) +{ + dev_set_drvdata(&faux_dev->dev, data); +} + +#endif /* _FAUX_DEVICE_H_ */ diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index d7e30d4f7503..f3bc0bcd7098 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -78,14 +78,18 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map) #define phys_to_dma_unencrypted phys_to_dma #endif #else -static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, - phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { if (dev->dma_range_map) return translate_phys_to_dma(dev, paddr); return paddr; } +static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, + phys_addr_t paddr) +{ + return dma_addr_unencrypted(__phys_to_dma(dev, paddr)); +} /* * If memory encryption is supported, phys_to_dma will set the memory encryption * bit in the DMA address, and dma_to_phys will clear it. @@ -94,19 +98,20 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, */ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return __sme_set(phys_to_dma_unencrypted(dev, paddr)); + return dma_addr_encrypted(__phys_to_dma(dev, paddr)); } static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { phys_addr_t paddr; + dma_addr = dma_addr_canonical(dma_addr); if (dev->dma_range_map) paddr = translate_dma_to_phys(dev, dma_addr); else paddr = dma_addr; - return __sme_clr(paddr); + return paddr; } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c433..85ab710ec0e7 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,10 +629,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index 2dea217629d0..5d43881e6fb7 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -138,8 +138,7 @@ int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num); void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num, void *data, - void (*cleanup)(void *data, dma_addr_t desc_dma), - bool skip_fdq); + void (*cleanup)(void *data, dma_addr_t desc_dma)); int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 346251bf1026..bb146c5ac3e4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -839,7 +839,6 @@ struct dma_filter { * The function takes a buffer of size buf_len. The callback function will * be called after period_len bytes have been transferred. * @device_prep_interleaved_dma: Transfer expression in a generic way. - * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address * @device_caps: May be used to override the generic DMA slave capabilities * with per-channel specific ones * @device_config: Pushes a new configuration to a channel, return 0 or an error @@ -942,9 +941,6 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)( - struct dma_chan *chan, dma_addr_t dst, u64 data, - unsigned long flags); void (*device_caps)(struct dma_chan *chan, struct dma_slave_caps *caps); int (*device_config)(struct dma_chan *chan, struct dma_slave_config *config); @@ -1639,14 +1635,14 @@ static inline struct dma_chan { struct dma_chan *chan; - chan = dma_request_slave_channel(dev, name); - if (chan) + chan = dma_request_chan(dev, name); + if (!IS_ERR(chan)) return chan; if (!fn || !fn_param) return NULL; - return __dma_request_channel(&mask, fn, fn_param, NULL); + return dma_request_channel(mask, fn, fn_param); } static inline char * diff --git a/include/linux/edac.h b/include/linux/edac.h index b4ee8961e623..451f9c152c99 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -661,4 +661,219 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, return mci->dimms[index]; } + +#define EDAC_FEAT_NAME_LEN 128 + +/* RAS feature type */ +enum edac_dev_feat { + RAS_FEAT_SCRUB, + RAS_FEAT_ECS, + RAS_FEAT_MEM_REPAIR, + RAS_FEAT_MAX +}; + +/** + * struct edac_scrub_ops - scrub device operations (all elements optional) + * @read_addr: read base address of scrubbing range. + * @read_size: read offset of scrubbing range. + * @write_addr: set base address of the scrubbing range. + * @write_size: set offset of the scrubbing range. + * @get_enabled_bg: check if currently performing background scrub. + * @set_enabled_bg: start or stop a bg-scrub. + * @get_min_cycle: get minimum supported scrub cycle duration in seconds. + * @get_max_cycle: get maximum supported scrub cycle duration in seconds. + * @get_cycle_duration: get current scrub cycle duration in seconds. + * @set_cycle_duration: set current scrub cycle duration in seconds. + */ +struct edac_scrub_ops { + int (*read_addr)(struct device *dev, void *drv_data, u64 *base); + int (*read_size)(struct device *dev, void *drv_data, u64 *size); + int (*write_addr)(struct device *dev, void *drv_data, u64 base); + int (*write_size)(struct device *dev, void *drv_data, u64 size); + int (*get_enabled_bg)(struct device *dev, void *drv_data, bool *enable); + int (*set_enabled_bg)(struct device *dev, void *drv_data, bool enable); + int (*get_min_cycle)(struct device *dev, void *drv_data, u32 *min); + int (*get_max_cycle)(struct device *dev, void *drv_data, u32 *max); + int (*get_cycle_duration)(struct device *dev, void *drv_data, u32 *cycle); + int (*set_cycle_duration)(struct device *dev, void *drv_data, u32 cycle); +}; + +#if IS_ENABLED(CONFIG_EDAC_SCRUB) +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_SCRUB */ + +/** + * struct edac_ecs_ops - ECS device operations (all elements optional) + * @get_log_entry_type: read the log entry type value. + * @set_log_entry_type: set the log entry type value. + * @get_mode: read the mode value. + * @set_mode: set the mode value. + * @reset: reset the ECS counter. + * @get_threshold: read the threshold count per gigabits of memory cells. + * @set_threshold: set the threshold count per gigabits of memory cells. + */ +struct edac_ecs_ops { + int (*get_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_mode)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_mode)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*reset)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_threshold)(struct device *dev, void *drv_data, int fru_id, u32 *threshold); + int (*set_threshold)(struct device *dev, void *drv_data, int fru_id, u32 threshold); +}; + +struct edac_ecs_ex_info { + u16 num_media_frus; +}; + +#if IS_ENABLED(CONFIG_EDAC_ECS) +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus); +#else +static inline int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_ECS */ + +enum edac_mem_repair_type { + EDAC_REPAIR_MAX +}; + +enum edac_mem_repair_cmd { + EDAC_DO_MEM_REPAIR = 1, +}; + +/** + * struct edac_mem_repair_ops - memory repair operations + * (all elements are optional except do_repair, set_hpa/set_dpa) + * @get_repair_type: get the memory repair type, listed in + * enum edac_mem_repair_function. + * @get_persist_mode: get the current persist mode. + * false - Soft repair type (temporary repair). + * true - Hard memory repair type (permanent repair). + * @set_persist_mode: set the persist mode of the memory repair instance. + * @get_repair_safe_when_in_use: get whether memory media is accessible and + * data is retained during repair operation. + * @get_hpa: get current host physical address (HPA) of memory to repair. + * @set_hpa: set host physical address (HPA) of memory to repair. + * @get_min_hpa: get the minimum supported host physical address (HPA). + * @get_max_hpa: get the maximum supported host physical address (HPA). + * @get_dpa: get current device physical address (DPA) of memory to repair. + * @set_dpa: set device physical address (DPA) of memory to repair. + * In some states of system configuration (e.g. before address decoders + * have been configured), memory devices (e.g. CXL) may not have an active + * mapping in the host physical address map. As such, the memory + * to repair must be identified by a device specific physical addressing + * scheme using a device physical address(DPA). The DPA and other control + * attributes to use for the repair operations will be presented in related + * error records. + * @get_min_dpa: get the minimum supported device physical address (DPA). + * @get_max_dpa: get the maximum supported device physical address (DPA). + * @get_nibble_mask: get current nibble mask of memory to repair. + * @set_nibble_mask: set nibble mask of memory to repair. + * @get_bank_group: get current bank group of memory to repair. + * @set_bank_group: set bank group of memory to repair. + * @get_bank: get current bank of memory to repair. + * @set_bank: set bank of memory to repair. + * @get_rank: get current rank of memory to repair. + * @set_rank: set rank of memory to repair. + * @get_row: get current row of memory to repair. + * @set_row: set row of memory to repair. + * @get_column: get current column of memory to repair. + * @set_column: set column of memory to repair. + * @get_channel: get current channel of memory to repair. + * @set_channel: set channel of memory to repair. + * @get_sub_channel: get current subchannel of memory to repair. + * @set_sub_channel: set subchannel of memory to repair. + * @do_repair: Issue memory repair operation for the HPA/DPA and + * other control attributes set for the memory to repair. + * + * All elements are optional except do_repair and at least one of set_hpa/set_dpa. + */ +struct edac_mem_repair_ops { + int (*get_repair_type)(struct device *dev, void *drv_data, const char **type); + int (*get_persist_mode)(struct device *dev, void *drv_data, bool *persist); + int (*set_persist_mode)(struct device *dev, void *drv_data, bool persist); + int (*get_repair_safe_when_in_use)(struct device *dev, void *drv_data, bool *safe); + int (*get_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*set_hpa)(struct device *dev, void *drv_data, u64 hpa); + int (*get_min_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_max_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*set_dpa)(struct device *dev, void *drv_data, u64 dpa); + int (*get_min_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val); + int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val); + int (*get_bank_group)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank_group)(struct device *dev, void *drv_data, u32 val); + int (*get_bank)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank)(struct device *dev, void *drv_data, u32 val); + int (*get_rank)(struct device *dev, void *drv_data, u32 *val); + int (*set_rank)(struct device *dev, void *drv_data, u32 val); + int (*get_row)(struct device *dev, void *drv_data, u32 *val); + int (*set_row)(struct device *dev, void *drv_data, u32 val); + int (*get_column)(struct device *dev, void *drv_data, u32 *val); + int (*set_column)(struct device *dev, void *drv_data, u32 val); + int (*get_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_channel)(struct device *dev, void *drv_data, u32 val); + int (*get_sub_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_sub_channel)(struct device *dev, void *drv_data, u32 val); + int (*do_repair)(struct device *dev, void *drv_data, u32 val); +}; + +#if IS_ENABLED(CONFIG_EDAC_MEM_REPAIR) +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_MEM_REPAIR */ + +/* EDAC device feature information structure */ +struct edac_dev_data { + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; + }; + u8 instance; + void *private; +}; + +struct edac_dev_feat_ctx { + struct device dev; + void *private; + struct edac_dev_data *scrub; + struct edac_dev_data ecs; + struct edac_dev_data *mem_repair; +}; + +struct edac_dev_feature { + enum edac_dev_feat ft_type; + u8 instance; + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; + }; + void *ctx; + struct edac_ecs_ex_info ecs_info; +}; + +int edac_dev_register(struct device *parent, char *dev_name, + void *parent_pvt_data, int num_features, + const struct edac_dev_feature *ras_features); #endif /* _LINUX_EDAC_H_ */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 8bcd629ee250..7d63d1d75f22 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -114,22 +114,22 @@ typedef struct { #define EFI_MAX_MEMORY_TYPE 16 /* Attribute values: */ -#define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */ -#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */ -#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */ -#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */ -#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */ -#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ -#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ -#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ -#define EFI_MEMORY_NV ((u64)0x0000000000008000ULL) /* non-volatile */ -#define EFI_MEMORY_MORE_RELIABLE \ - ((u64)0x0000000000010000ULL) /* higher reliability */ -#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ -#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ -#define EFI_MEMORY_CPU_CRYPTO ((u64)0x0000000000080000ULL) /* supports encryption */ +#define EFI_MEMORY_UC BIT_ULL(0) /* uncached */ +#define EFI_MEMORY_WC BIT_ULL(1) /* write-coalescing */ +#define EFI_MEMORY_WT BIT_ULL(2) /* write-through */ +#define EFI_MEMORY_WB BIT_ULL(3) /* write-back */ +#define EFI_MEMORY_UCE BIT_ULL(4) /* uncached, exported */ +#define EFI_MEMORY_WP BIT_ULL(12) /* write-protect */ +#define EFI_MEMORY_RP BIT_ULL(13) /* read-protect */ +#define EFI_MEMORY_XP BIT_ULL(14) /* execute-protect */ +#define EFI_MEMORY_NV BIT_ULL(15) /* non-volatile */ +#define EFI_MEMORY_MORE_RELIABLE BIT_ULL(16) /* higher reliability */ +#define EFI_MEMORY_RO BIT_ULL(17) /* read-only */ +#define EFI_MEMORY_SP BIT_ULL(18) /* soft reserved */ +#define EFI_MEMORY_CPU_CRYPTO BIT_ULL(19) /* supports encryption */ #define EFI_MEMORY_HOT_PLUGGABLE BIT_ULL(20) /* supports unplugging at runtime */ -#define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ +#define EFI_MEMORY_RUNTIME BIT_ULL(63) /* range requires runtime mapping */ + #define EFI_MEMORY_DESCRIPTOR_VERSION 1 #define EFI_PAGE_SHIFT 12 @@ -364,7 +364,6 @@ void efi_native_runtime_setup(void); #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) -#define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) #define LINUX_EFI_CRASH_GUID EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0) @@ -374,7 +373,6 @@ void efi_native_runtime_setup(void); #define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c) #define EFI_DEVICE_PATH_FROM_TEXT_PROTOCOL_GUID EFI_GUID(0x05c99a21, 0xc70f, 0x4ad2, 0x8a, 0x5f, 0x35, 0xdf, 0x33, 0x43, 0xf5, 0x1e) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) -#define EFI_UGA_PROTOCOL_GUID EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39) #define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) #define EFI_FILE_INFO_ID EFI_GUID(0x09576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_SYSTEM_RESOURCE_TABLE_GUID EFI_GUID(0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80) @@ -1287,8 +1285,6 @@ struct linux_efi_memreserve { void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); -char *efi_systab_show_arch(char *str); - /* * The LINUX_EFI_MOK_VARIABLE_TABLE_GUID config table can be provided * to the kernel by an EFI boot loader. The table contains a packed diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 752e0b297582..d8eabbf86a5b 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -167,18 +167,19 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table); + struct em_perf_table *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts); + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); -void em_table_free(struct em_perf_table __rcu *table); +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table *table); int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int nr_states); int em_dev_update_chip_binning(struct device *dev); int em_update_performance_limits(struct em_perf_domain *pd, unsigned long freq_min_khz, unsigned long freq_max_khz); +void em_rebuild_sched_domains(void); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM @@ -239,9 +240,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, struct em_perf_state *ps; int i; -#ifdef CONFIG_SCHED_DEBUG WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); -#endif if (!sum_util) return 0; @@ -345,8 +344,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts) + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) { return -EINVAL; } @@ -372,14 +371,14 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) return 0; } static inline -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) { return NULL; } -static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline void em_table_free(struct em_perf_table *table) {} static inline int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table) + struct em_perf_table *new_table) { return -EINVAL; } @@ -404,6 +403,7 @@ int em_update_performance_limits(struct em_perf_domain *pd, { return -EINVAL; } +static inline void em_rebuild_sched_domains(void) {} #endif #endif diff --git a/include/linux/err.h b/include/linux/err.h index a4dacd745fcf..1d60aa86db53 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -44,6 +44,9 @@ static inline void * __must_check ERR_PTR(long error) /* Return the pointer in the percpu address space. */ #define ERR_PTR_PCPU(error) ((void __percpu *)(unsigned long)ERR_PTR(error)) +/* Cast an error pointer to __iomem. */ +#define IOMEM_ERR_PTR(error) (__force void __iomem *)ERR_PTR(error) + /** * PTR_ERR - Extract the error code from an error pointer. * @ptr: An error pointer. diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index ecf203f01034..9a1eacf35d37 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -81,7 +81,7 @@ static const u8 eth_ipv6_mcast_addr_base[ETH_ALEN] __aligned(2) = * is_link_local_ether_addr - Determine if given Ethernet address is link-local * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per + * Return: true if address is link local reserved addr (01:80:c2:00:00:0X) per * IEEE 802.1Q 8.6.3 Frame filtering. * * Please note: addr must be aligned to u16. @@ -104,7 +104,7 @@ static inline bool is_link_local_ether_addr(const u8 *addr) * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is all zeroes. + * Return: true if the address is all zeroes. * * Please note: addr must be aligned to u16. */ @@ -123,7 +123,7 @@ static inline bool is_zero_ether_addr(const u8 *addr) * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a multicast address. + * Return: true if the address is a multicast address. * By definition the broadcast address is also a multicast address. */ static inline bool is_multicast_ether_addr(const u8 *addr) @@ -157,7 +157,7 @@ static inline bool is_multicast_ether_addr_64bits(const u8 *addr) * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802). * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a local address. + * Return: true if the address is a local address. */ static inline bool is_local_ether_addr(const u8 *addr) { @@ -168,7 +168,7 @@ static inline bool is_local_ether_addr(const u8 *addr) * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is the broadcast address. + * Return: true if the address is the broadcast address. * * Please note: addr must be aligned to u16. */ @@ -183,7 +183,7 @@ static inline bool is_broadcast_ether_addr(const u8 *addr) * is_unicast_ether_addr - Determine if the Ethernet address is unicast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a unicast address. + * Return: true if the address is a unicast address. */ static inline bool is_unicast_ether_addr(const u8 *addr) { @@ -197,7 +197,7 @@ static inline bool is_unicast_ether_addr(const u8 *addr) * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not * a multicast address, and is not FF:FF:FF:FF:FF:FF. * - * Return true if the address is valid. + * Return: true if the address is valid. * * Please note: addr must be aligned to u16. */ @@ -214,7 +214,7 @@ static inline bool is_valid_ether_addr(const u8 *addr) * * Check that the value from the Ethertype/length field is a valid Ethertype. * - * Return true if the valid is an 802.3 supported Ethertype. + * Return: true if the valid is an 802.3 supported Ethertype. */ static inline bool eth_proto_is_802_3(__be16 proto) { @@ -458,7 +458,7 @@ static inline bool ether_addr_is_ip_mcast(const u8 *addr) * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return a u64 value of the address + * Return: a u64 value of the address */ static inline u64 ether_addr_to_u64(const u8 *addr) { diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b0ed740ca749..8210ece94fa6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -78,6 +78,9 @@ enum { * @cqe_size: Size of TX/RX completion queue event * @tx_push_buf_len: Size of TX push buffer * @tx_push_buf_max_len: Maximum allowed size of TX push buffer + * @hds_thresh: Packet size threshold for header data split (HDS) + * @hds_thresh_max: Maximum supported setting for @hds_threshold + * */ struct kernel_ethtool_ringparam { u32 rx_buf_len; @@ -87,6 +90,8 @@ struct kernel_ethtool_ringparam { u32 cqe_size; u32 tx_push_buf_len; u32 tx_push_buf_max_len; + u32 hds_thresh; + u32 hds_thresh_max; }; /** @@ -97,6 +102,7 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split + * @ETHTOOL_RING_USE_HDS_THRS: capture for setting header-data-split-thresh */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), @@ -105,6 +111,7 @@ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_PUSH = BIT(3), ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5), + ETHTOOL_RING_USE_HDS_THRS = BIT(6), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -203,6 +210,14 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); +struct link_mode_info { + int speed; + u8 lanes; + u8 duplex; +}; + +extern const struct link_mode_info link_mode_params[]; + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -257,7 +272,7 @@ struct ethtool_link_ksettings { * @mode : one of the ETHTOOL_LINK_MODE_*_BIT * (not atomic, no bound checking) * - * Returns true/false. + * Returns: true/false. */ #define ethtool_link_ksettings_test_link_mode(ptr, name, mode) \ test_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) @@ -552,6 +567,12 @@ struct ethtool_rmon_stats { /** * struct ethtool_ts_stats - HW timestamping statistics * @pkts: Number of packets successfully timestamped by the hardware. + * @onestep_pkts_unconfirmed: Number of PTP packets with one-step TX + * timestamping that were sent, but for which the + * device offers no confirmation whether they made + * it onto the wire and the timestamp was inserted + * in the originTimestamp or correctionField, or + * not. * @lost: Number of hardware timestamping requests where the timestamping * information from the hardware never arrived for submission with * the skb. @@ -564,6 +585,7 @@ struct ethtool_rmon_stats { struct ethtool_ts_stats { struct_group(tx_stats, u64 pkts; + u64 onestep_pkts_unconfirmed; u64 lost; u64 err; ); @@ -734,6 +756,7 @@ struct ethtool_rxfh_param { * @cmd: command number = %ETHTOOL_GET_TS_INFO * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none + * @phc_qualifier: qualifier of the associated PHC * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values */ @@ -741,19 +764,19 @@ struct kernel_ethtool_ts_info { u32 cmd; u32 so_timestamping; int phc_index; + enum hwtstamp_provider_qualifier phc_qualifier; enum hwtstamp_tx_types tx_types; enum hwtstamp_rx_filters rx_filters; }; /** * struct ethtool_ops - optional netdev operations + * @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*. * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. * @cap_rss_ctx_supported: indicates if the driver supports RSS * contexts via legacy API, drivers implementing @create_rxfh_context * do not have to set this bit. - * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor - * RSS. * @rxfh_per_ctx_key: device supports setting different RSS key for each * additional context. Netlink API should report hfunc, key, and input_xfrm * for every context, not just context 0. @@ -772,6 +795,7 @@ struct kernel_ethtool_ts_info { * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. + * @supported_hwtstamp_qualifiers: bitfield of supported hwtstamp qualifier. * @get_drvinfo: Report driver/device information. Modern drivers no * longer have to implement this callback. Most fields are * correctly filled in by the core using system information, or @@ -978,9 +1002,9 @@ struct kernel_ethtool_ts_info { * of the generic netdev features interface. */ struct ethtool_ops { + u32 supported_input_xfrm:8; u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; - u32 cap_rss_sym_xor_supported:1; u32 rxfh_per_ctx_key:1; u32 cap_rss_rxnfc_adds:1; u32 rxfh_indir_space; @@ -989,6 +1013,7 @@ struct ethtool_ops { u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; + u32 supported_hwtstamp_qualifiers; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); @@ -1222,7 +1247,7 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, * @dev: pointer to net_device structure * @vclock_index: pointer to pointer of vclock index * - * Return number of phc vclocks + * Return: number of phc vclocks */ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); @@ -1276,7 +1301,7 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer. * @dev: pointer to net_device structure * @info: buffer to hold the result - * Returns zero on success, non-zero otherwise. + * Returns: zero on success, non-zero otherwise. */ int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info); @@ -1322,24 +1347,4 @@ struct ethtool_forced_speed_map { void ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); - -/* C33 PSE extended state and substate. */ -struct ethtool_c33_pse_ext_state_info { - enum ethtool_c33_pse_ext_state c33_pse_ext_state; - union { - enum ethtool_c33_pse_ext_substate_error_condition error_condition; - enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable; - enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted; - enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim; - enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected; - enum ethtool_c33_pse_ext_substate_power_not_available power_not_available; - enum ethtool_c33_pse_ext_substate_short_detected short_detected; - u32 __c33_pse_ext_substate; - }; -}; - -struct ethtool_c33_pse_pw_limit_range { - u32 min; - u32 max; -}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 0c0d00fcd131..ccb478eb174b 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t /* Used to release the epoll bits inside the "struct file" */ void eventpoll_release_file(struct file *file); +/* Copy ready events to userspace */ +int epoll_sendevents(struct file *file, struct epoll_event __user *events, + int maxevents); + /* * This is called from inside fs/file_table.c:__fput() to unlink files * from the eventpoll interface. We need to have this facility to cleanup diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 64130ae19690..3be35680a54f 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/moduleloader.h> +#include <linux/cleanup.h> #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -65,6 +66,37 @@ enum execmem_range_flags { * Architectures that use EXECMEM_ROX_CACHE must implement this. */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); + +/** + * execmem_make_temp_rw - temporarily remap region with read-write + * permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Remaps a part of the cached large page in the ROX cache in the range + * [@ptr, @ptr + @size) as writable and not executable. The caller must + * have exclusive ownership of this range and ensure nothing will try to + * execute code in this range. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_make_temp_rw(void *ptr, size_t size); + +/** + * execmem_restore_rox - restore read-only-execute permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Restores read-only-execute permissions on a range [@ptr, @ptr + @size) + * after it was temporarily remapped as writable. Relies on architecture + * implementation of set_memory_rox() to restore mapping using large pages. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_restore_rox(void *ptr, size_t size); +#else +static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } +static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif /** @@ -139,6 +171,8 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); + #ifdef CONFIG_MMU /** * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory diff --git a/include/linux/export.h b/include/linux/export.h index 2633df4d31e6..f35d03b4113b 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -24,11 +24,17 @@ .long sym #endif -#define ___EXPORT_SYMBOL(sym, license, ns) \ +/* + * LLVM integrated assembler cam merge adjacent string literals (like + * C and GNU-as) passed to '.ascii', but not to '.asciz' and chokes on: + * + * .asciz "MODULE_" "kvm" ; + */ +#define ___EXPORT_SYMBOL(sym, license, ns...) \ .section ".export_symbol","a" ASM_NL \ __export_symbol_##sym: ASM_NL \ .asciz license ASM_NL \ - .asciz ns ASM_NL \ + .ascii ns "\0" ASM_NL \ __EXPORT_SYMBOL_REF(sym) ASM_NL \ .previous @@ -52,9 +58,24 @@ #else +#ifdef CONFIG_GENDWARFKSYMS +/* + * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging + * information for all exported symbols, including those defined in + * different TUs, by adding a __gendwarfksyms_ptr_<symbol> pointer + * that's discarded during the final link. + */ +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; +#else +#define __GENDWARFKSYMS_EXPORT(sym) +#endif + #define __EXPORT_SYMBOL(sym, license, ns) \ extern typeof(sym) sym; \ __ADDRESSABLE(sym) \ + __GENDWARFKSYMS_EXPORT(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) #endif @@ -70,4 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) +#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) + #endif /* _LINUX_EXPORT_H */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 4cc8801e50e3..25c4a5afbd44 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -3,6 +3,7 @@ #define LINUX_EXPORTFS_H 1 #include <linux/types.h> +#include <linux/path.h> struct dentry; struct iattr; @@ -156,6 +157,17 @@ struct fid { }; }; +enum handle_to_path_flags { + HANDLE_CHECK_PERMS = (1 << 0), + HANDLE_CHECK_SUBTREE = (1 << 1), +}; + +struct handle_to_path_ctx { + struct path root; + enum handle_to_path_flags flags; + unsigned int fh_flags; +}; + #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ #define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ @@ -225,6 +237,12 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * permission: + * Allow filesystems to specify a custom permission function. + * + * open: + * Allow filesystems to specify a custom open function. + * * commit_metadata: * @commit_metadata should commit metadata changes to stable storage. * @@ -251,6 +269,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); + struct file * (*open)(struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ @@ -259,10 +279,22 @@ struct export_operations { atomic attribute updates */ #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ -#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */ +#define EXPORT_OP_NOLOCKS (0x40) /* no file locking support */ unsigned long flags; }; +/** + * exportfs_cannot_lock() - check if export implements file locking + * @export_ops: the nfs export operations to check + * + * Returns true if the export does not support file locking. + */ +static inline bool +exportfs_cannot_lock(const struct export_operations *export_ops) +{ + return export_ops->flags & EXPORT_OP_NOLOCKS; +} + extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); @@ -282,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { + if (!nop) + return false; + /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -290,6 +325,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, return exportfs_can_encode_fid(nop); /* + * If a connectable file handle was requested, we need to make sure that + * filesystem can also decode connected file handles. + */ + if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent) + return false; + + /* * If a decodeable file handle was requested, we need to make sure that * filesystem can also decode file handles. */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c24f8bc01045..5206d63b3386 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -78,6 +78,7 @@ enum stop_cp_reason { STOP_CP_REASON_UPDATE_INODE, STOP_CP_REASON_FLUSH_FAIL, STOP_CP_REASON_NO_SEGMENT, + STOP_CP_REASON_CORRUPTED_FREE_BITMAP, STOP_CP_REASON_MAX, }; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 89ff45bd6f01..3c817dc6292e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ #define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) -#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) +#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT) /* * fanotify_init() flags that require CAP_SYS_ADMIN. @@ -38,7 +38,8 @@ FAN_REPORT_PIDFD | \ FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ - FAN_UNLIMITED_MARKS) + FAN_UNLIMITED_MARKS | \ + FAN_REPORT_MNT) /* * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. @@ -58,7 +59,7 @@ #define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV) #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ - FAN_MARK_FILESYSTEM) + FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS) #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ FAN_MARK_FLUSH) @@ -89,6 +90,16 @@ #define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ FAN_RENAME) +/* Content events can be used to inspect file content */ +#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \ + FAN_ACCESS_PERM) +/* Pre-content events can be used to fill file content */ +#define FANOTIFY_PRE_CONTENT_EVENTS (FAN_PRE_ACCESS) + +/* Events that require a permission response from user */ +#define FANOTIFY_PERM_EVENTS (FANOTIFY_CONTENT_PERM_EVENTS | \ + FANOTIFY_PRE_CONTENT_EVENTS) + /* Events that can be reported with event->fd */ #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) @@ -99,14 +110,13 @@ /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ #define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) +#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH) + /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ FANOTIFY_INODE_EVENTS | \ - FANOTIFY_ERROR_EVENTS) - -/* Events that require a permission response from user */ -#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ - FAN_OPEN_EXEC_PERM) + FANOTIFY_ERROR_EVENTS | \ + FANOTIFY_MOUNT_EVENTS) /* Extra flags that may be reported with event or control handling of events */ #define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) @@ -126,7 +136,9 @@ /* These masks check for invalid bits in permission responses. */ #define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY) #define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO) -#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS) +#define FANOTIFY_RESPONSE_VALID_MASK \ + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \ + (FAN_ERRNO_MASK << FAN_ERRNO_SHIFT)) /* Do not use these old uapi constants internally */ #undef FAN_ALL_CLASS_BITS diff --git a/include/linux/fb.h b/include/linux/fb.h index 5ba187e08cf7..cd653862ab99 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -225,6 +225,7 @@ struct fb_deferred_io { int open_count; /* number of opened files; protected by fb_info lock */ struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ + struct address_space *mapping; /* page cache object for fb device */ /* callback */ struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h index c50882f19235..966092ffa89a 100644 --- a/include/linux/fiemap.h +++ b/include/linux/fiemap.h @@ -5,12 +5,18 @@ #include <uapi/linux/fiemap.h> #include <linux/fs.h> +/** + * struct fiemap_extent_info - fiemap request to a filesystem + * @fi_flags: Flags as passed from user + * @fi_extents_mapped: Number of mapped extents + * @fi_extents_max: Size of fiemap_extent array + * @fi_extents_start: Start of fiemap_extent array + */ struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent __user *fi_extents_start; /* Start of - fiemap_extent array */ + unsigned int fi_flags; + unsigned int fi_extents_mapped; + unsigned int fi_extents_max; + struct fiemap_extent __user *fi_extents_start; }; int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 9b3a8d9b17ab..31551e4cb8f3 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -161,6 +161,33 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) } /** + * file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF + * @ref: Pointer to the reference count + * + * Semantically it is equivalent to calling file_ref_put(), but it trades lower + * performance in face of other CPUs also modifying the refcount for higher + * performance when this happens to be the last reference. + * + * For the last reference file_ref_put() issues 2 atomics. One to drop the + * reference and another to transition it to FILE_REF_DEAD. This routine does + * the work in one step, but in order to do it has to pre-read the variable which + * decreases scalability. + * + * Use with close() et al, stick to file_ref_put() by default. + */ +static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) +{ + long old; + + old = atomic_long_read(&ref->refcnt); + if (likely(old == FILE_REF_ONEREF)) { + if (likely(atomic_long_try_cmpxchg(&ref->refcnt, &old, FILE_REF_DEAD))) + return true; + } + return file_ref_put(ref); +} + +/** * file_ref_read - Read the number of file references * @ref: Pointer to the reference count * @@ -174,4 +201,18 @@ static inline unsigned long file_ref_read(file_ref_t *ref) return c >= FILE_REF_RELEASED ? 0 : c + 1; } +/* + * __file_ref_read_raw - Return the value stored in ref->refcnt + * @ref: Pointer to the reference count + * + * Return: The raw value found in the counter + * + * A hack for file_needs_f_pos_lock(), you probably want to use + * file_ref_read() instead. + */ +static inline unsigned long __file_ref_read_raw(file_ref_t *ref) +{ + return atomic_long_read(&ref->refcnt); +} + #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 0477254bc2d3..f5cf4d35d83e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -364,6 +364,8 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16) + * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ @@ -469,6 +471,16 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) .off = 0, \ .imm = BPF_CALL_IMM(FUNC) }) +/* Kfunc call */ + +#define BPF_CALL_KFUNC(OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ + .dst_reg = 0, \ + .src_reg = BPF_PSEUDO_KFUNC_CALL, \ + .off = OFF, \ + .imm = IMM }) + /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ @@ -659,6 +671,11 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); +struct bpf_timed_may_goto { + u64 count; + u64 timestamp; +}; + struct sk_filter { refcount_t refcnt; struct rcu_head rcu; @@ -1120,8 +1137,11 @@ bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); bool bpf_jit_supports_private_stack(void); +bool bpf_jit_supports_timed_may_goto(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); +u64 arch_bpf_timed_may_goto(void); +u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *); bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); static inline bool bpf_dump_raw_ok(const struct cred *cred) @@ -1179,17 +1199,18 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd, * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct xdp_buff *xdp, struct bpf_prog *prog); + struct xdp_buff *xdp, const struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *prog); + const struct bpf_prog *prog); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, struct xdp_frame *xdpf, - struct bpf_prog *prog); + const struct bpf_prog *prog); void xdp_do_flush(void); -void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); +void bpf_warn_invalid_xdp_action(const struct net_device *dev, + const struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, @@ -1507,6 +1528,7 @@ struct bpf_sock_ops_kern { void *skb_data_end; u8 op; u8 is_fullsock; + u8 is_locked_tcp_sock; u8 remaining_opt_len; u64 temp; /* temp and everything after is not * initialized to 0 before calling diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h new file mode 100644 index 000000000000..ecd821ed8064 --- /dev/null +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Support utilities for cs_dsp testing. + * + * Copyright (C) 2024 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#include <linux/regmap.h> +#include <linux/firmware/cirrus/wmfw.h> + +struct kunit; +struct cs_dsp_test; +struct cs_dsp_test_local; + +/** + * struct cs_dsp_test - base class for test utilities + * + * @test: Pointer to struct kunit instance. + * @dsp: Pointer to struct cs_dsp instance. + * @local: Private data for each test suite. + */ +struct cs_dsp_test { + struct kunit *test; + struct cs_dsp *dsp; + + struct cs_dsp_test_local *local; + + /* Following members are private */ + bool saw_bus_write; +}; + +/** + * struct cs_dsp_mock_alg_def - Info for creating a mock algorithm entry. + * + * @id Algorithm ID. + * @ver; Algorithm version. + * @xm_base_words XM base address in DSP words. + * @xm_size_words XM size in DSP words. + * @ym_base_words YM base address in DSP words. + * @ym_size_words YM size in DSP words. + * @zm_base_words ZM base address in DSP words. + * @zm_size_words ZM size in DSP words. + */ +struct cs_dsp_mock_alg_def { + unsigned int id; + unsigned int ver; + unsigned int xm_base_words; + unsigned int xm_size_words; + unsigned int ym_base_words; + unsigned int ym_size_words; + unsigned int zm_base_words; + unsigned int zm_size_words; +}; + +struct cs_dsp_mock_coeff_def { + const char *shortname; + const char *fullname; + const char *description; + u16 type; + u16 flags; + u16 mem_type; + unsigned int offset_dsp_words; + unsigned int length_bytes; +}; + +/** + * struct cs_dsp_mock_xm_header - XM header builder + * + * @test_priv: Pointer to the struct cs_dsp_test. + * @blob_data: Pointer to the created blob data. + * @blob_size_bytes: Size of the data at blob_data. + */ +struct cs_dsp_mock_xm_header { + struct cs_dsp_test *test_priv; + void *blob_data; + size_t blob_size_bytes; +}; + +struct cs_dsp_mock_wmfw_builder; +struct cs_dsp_mock_bin_builder; + +extern const unsigned int cs_dsp_mock_adsp2_32bit_sysbase; +extern const unsigned int cs_dsp_mock_adsp2_16bit_sysbase; +extern const unsigned int cs_dsp_mock_halo_core_base; +extern const unsigned int cs_dsp_mock_halo_sysinfo_base; + +extern const struct cs_dsp_region cs_dsp_mock_halo_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_halo_dsp1_region_sizes[]; +extern const struct cs_dsp_region cs_dsp_mock_adsp2_32bit_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_adsp2_32bit_dsp1_region_sizes[]; +extern const struct cs_dsp_region cs_dsp_mock_adsp2_16bit_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_adsp2_16bit_dsp1_region_sizes[]; +int cs_dsp_mock_count_regions(const unsigned int *region_sizes); +unsigned int cs_dsp_mock_size_of_region(const struct cs_dsp *dsp, int mem_type); +unsigned int cs_dsp_mock_base_addr_for_mem(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_addr_inc_per_unpacked_word(struct cs_dsp_test *priv); +unsigned int cs_dsp_mock_reg_block_length_bytes(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_block_length_registers(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_block_length_dsp_words(struct cs_dsp_test *priv, int mem_type); +bool cs_dsp_mock_has_zm(struct cs_dsp_test *priv); +int cs_dsp_mock_packed_to_unpacked_mem_type(int packed_mem_type); +unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_words); +unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, + unsigned int alg_id, + int mem_type); +unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header); +void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv); +int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header); +struct cs_dsp_mock_xm_header *cs_dsp_create_mock_xm_header(struct cs_dsp_test *priv, + const struct cs_dsp_mock_alg_def *algs, + size_t num_algs); + +int cs_dsp_mock_regmap_init(struct cs_dsp_test *priv); +void cs_dsp_mock_regmap_drop_range(struct cs_dsp_test *priv, + unsigned int first_reg, unsigned int last_reg); +void cs_dsp_mock_regmap_drop_regs(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_regs); +void cs_dsp_mock_regmap_drop_bytes(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_bytes); +void cs_dsp_mock_regmap_drop_system_regs(struct cs_dsp_test *priv); +bool cs_dsp_mock_regmap_is_dirty(struct cs_dsp_test *priv, bool drop_system_regs); + +struct cs_dsp_mock_bin_builder *cs_dsp_mock_bin_init(struct cs_dsp_test *priv, + int format_version, + unsigned int fw_version); +void cs_dsp_mock_bin_add_raw_block(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int type, unsigned int offset, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_bin_add_info(struct cs_dsp_mock_bin_builder *builder, + const char *info); +void cs_dsp_mock_bin_add_name(struct cs_dsp_mock_bin_builder *builder, + const char *name); +void cs_dsp_mock_bin_add_patch(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int mem_region, unsigned int reg_addr_offset, + const void *payload_data, size_t payload_len_bytes); +struct firmware *cs_dsp_mock_bin_get_firmware(struct cs_dsp_mock_bin_builder *builder); + +struct cs_dsp_mock_wmfw_builder *cs_dsp_mock_wmfw_init(struct cs_dsp_test *priv, + int format_version); +void cs_dsp_mock_wmfw_add_raw_block(struct cs_dsp_mock_wmfw_builder *builder, + int mem_region, unsigned int mem_offset_dsp_words, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_wmfw_add_info(struct cs_dsp_mock_wmfw_builder *builder, + const char *info); +void cs_dsp_mock_wmfw_add_data_block(struct cs_dsp_mock_wmfw_builder *builder, + int mem_region, unsigned int mem_offset_dsp_words, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_wmfw_start_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder, + unsigned int alg_id, + const char *name, + const char *description); +void cs_dsp_mock_wmfw_add_coeff_desc(struct cs_dsp_mock_wmfw_builder *builder, + const struct cs_dsp_mock_coeff_def *def); +void cs_dsp_mock_wmfw_end_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder); +struct firmware *cs_dsp_mock_wmfw_get_firmware(struct cs_dsp_mock_wmfw_builder *builder); +int cs_dsp_mock_wmfw_format_version(struct cs_dsp_mock_wmfw_builder *builder); diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 4621aec0328c..983e1591bbba 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -105,6 +105,14 @@ bool qcom_scm_ice_available(void); int qcom_scm_ice_invalidate_key(u32 index); int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, enum qcom_scm_ice_cipher cipher, u32 data_unit_size); +bool qcom_scm_has_wrapped_key_support(void); +int qcom_scm_derive_sw_secret(const u8 *eph_key, size_t eph_key_size, + u8 *sw_secret, size_t sw_secret_size); +int qcom_scm_generate_ice_key(u8 *lt_key, size_t lt_key_size); +int qcom_scm_prepare_ice_key(const u8 *lt_key, size_t lt_key_size, + u8 *eph_key, size_t eph_key_size); +int qcom_scm_import_ice_key(const u8 *raw_key, size_t raw_key_size, + u8 *lt_key, size_t lt_key_size); bool qcom_scm_hdcp_available(void); int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h new file mode 100644 index 000000000000..76255b5d06b1 --- /dev/null +++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2020 Samsung Electronics Co., Ltd. + * Copyright 2020 Google LLC. + * Copyright 2024 Linaro Ltd. + */ + +#ifndef __EXYNOS_ACPM_PROTOCOL_H +#define __EXYNOS_ACPM_PROTOCOL_H + +#include <linux/types.h> + +struct acpm_handle; + +struct acpm_pmic_ops { + int (*read_reg)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 *buf); + int (*bulk_read)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, u8 *buf); + int (*write_reg)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value); + int (*bulk_write)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, const u8 *buf); + int (*update_reg)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value, u8 mask); +}; + +struct acpm_ops { + struct acpm_pmic_ops pmic_ops; +}; + +/** + * struct acpm_handle - Reference to an initialized protocol instance + * @ops: + */ +struct acpm_handle { + struct acpm_ops ops; +}; + +struct device; + +const struct acpm_handle *devm_acpm_get_by_phandle(struct device *dev, + const char *property); +#endif /* __EXYNOS_ACPM_PROTOCOL_H */ diff --git a/include/linux/firmware/thead/thead,th1520-aon.h b/include/linux/firmware/thead/thead,th1520-aon.h new file mode 100644 index 000000000000..dae132b66873 --- /dev/null +++ b/include/linux/firmware/thead/thead,th1520-aon.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Alibaba Group Holding Limited. + */ + +#ifndef _THEAD_AON_H +#define _THEAD_AON_H + +#include <linux/device.h> +#include <linux/types.h> + +#define AON_RPC_MSG_MAGIC (0xef) +#define TH1520_AON_RPC_VERSION 2 +#define TH1520_AON_RPC_MSG_NUM 7 + +struct th1520_aon_chan; + +enum th1520_aon_rpc_svc { + TH1520_AON_RPC_SVC_UNKNOWN = 0, + TH1520_AON_RPC_SVC_PM = 1, + TH1520_AON_RPC_SVC_MISC = 2, + TH1520_AON_RPC_SVC_AVFS = 3, + TH1520_AON_RPC_SVC_SYS = 4, + TH1520_AON_RPC_SVC_WDG = 5, + TH1520_AON_RPC_SVC_LPM = 6, + TH1520_AON_RPC_SVC_MAX = 0x3F, +}; + +enum th1520_aon_misc_func { + TH1520_AON_MISC_FUNC_UNKNOWN = 0, + TH1520_AON_MISC_FUNC_SET_CONTROL = 1, + TH1520_AON_MISC_FUNC_GET_CONTROL = 2, + TH1520_AON_MISC_FUNC_REGDUMP_CFG = 3, +}; + +enum th1520_aon_wdg_func { + TH1520_AON_WDG_FUNC_UNKNOWN = 0, + TH1520_AON_WDG_FUNC_START = 1, + TH1520_AON_WDG_FUNC_STOP = 2, + TH1520_AON_WDG_FUNC_PING = 3, + TH1520_AON_WDG_FUNC_TIMEOUTSET = 4, + TH1520_AON_WDG_FUNC_RESTART = 5, + TH1520_AON_WDG_FUNC_GET_STATE = 6, + TH1520_AON_WDG_FUNC_POWER_OFF = 7, + TH1520_AON_WDG_FUNC_AON_WDT_ON = 8, + TH1520_AON_WDG_FUNC_AON_WDT_OFF = 9, +}; + +enum th1520_aon_sys_func { + TH1520_AON_SYS_FUNC_UNKNOWN = 0, + TH1520_AON_SYS_FUNC_AON_RESERVE_MEM = 1, +}; + +enum th1520_aon_lpm_func { + TH1520_AON_LPM_FUNC_UNKNOWN = 0, + TH1520_AON_LPM_FUNC_REQUIRE_STR = 1, + TH1520_AON_LPM_FUNC_RESUME_STR = 2, + TH1520_AON_LPM_FUNC_REQUIRE_STD = 3, + TH1520_AON_LPM_FUNC_CPUHP = 4, + TH1520_AON_LPM_FUNC_REGDUMP_CFG = 5, +}; + +enum th1520_aon_pm_func { + TH1520_AON_PM_FUNC_UNKNOWN = 0, + TH1520_AON_PM_FUNC_SET_RESOURCE_REGULATOR = 1, + TH1520_AON_PM_FUNC_GET_RESOURCE_REGULATOR = 2, + TH1520_AON_PM_FUNC_SET_RESOURCE_POWER_MODE = 3, + TH1520_AON_PM_FUNC_PWR_SET = 4, + TH1520_AON_PM_FUNC_PWR_GET = 5, + TH1520_AON_PM_FUNC_CHECK_FAULT = 6, + TH1520_AON_PM_FUNC_GET_TEMPERATURE = 7, +}; + +struct th1520_aon_rpc_msg_hdr { + u8 ver; /* version of msg hdr */ + u8 size; /* msg size ,uinit in bytes,the size includes rpc msg header self */ + u8 svc; /* rpc main service id */ + u8 func; /* rpc sub func id of specific service, sent by caller */ +} __packed __aligned(1); + +struct th1520_aon_rpc_ack_common { + struct th1520_aon_rpc_msg_hdr hdr; + u8 err_code; +} __packed __aligned(1); + +#define RPC_SVC_MSG_TYPE_DATA 0 +#define RPC_SVC_MSG_TYPE_ACK 1 +#define RPC_SVC_MSG_NEED_ACK 0 +#define RPC_SVC_MSG_NO_NEED_ACK 1 + +#define RPC_GET_VER(MESG) ((MESG)->ver) +#define RPC_SET_VER(MESG, VER) ((MESG)->ver = (VER)) +#define RPC_GET_SVC_ID(MESG) ((MESG)->svc & 0x3F) +#define RPC_SET_SVC_ID(MESG, ID) ((MESG)->svc |= 0x3F & (ID)) +#define RPC_GET_SVC_FLAG_MSG_TYPE(MESG) (((MESG)->svc & 0x80) >> 7) +#define RPC_SET_SVC_FLAG_MSG_TYPE(MESG, TYPE) ((MESG)->svc |= (TYPE) << 7) +#define RPC_GET_SVC_FLAG_ACK_TYPE(MESG) (((MESG)->svc & 0x40) >> 6) +#define RPC_SET_SVC_FLAG_ACK_TYPE(MESG, ACK) ((MESG)->svc |= (ACK) << 6) + +#define RPC_SET_BE64(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + u64 _set_data = (SET_DATA); \ + data[_offset + 7] = _set_data & 0xFF; \ + data[_offset + 6] = (_set_data & 0xFF00) >> 8; \ + data[_offset + 5] = (_set_data & 0xFF0000) >> 16; \ + data[_offset + 4] = (_set_data & 0xFF000000) >> 24; \ + data[_offset + 3] = (_set_data & 0xFF00000000) >> 32; \ + data[_offset + 2] = (_set_data & 0xFF0000000000) >> 40; \ + data[_offset + 1] = (_set_data & 0xFF000000000000) >> 48; \ + data[_offset + 0] = (_set_data & 0xFF00000000000000) >> 56; \ + } while (0) + +#define RPC_SET_BE32(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + u64 _set_data = (SET_DATA); \ + data[_offset + 3] = (_set_data) & 0xFF; \ + data[_offset + 2] = (_set_data & 0xFF00) >> 8; \ + data[_offset + 1] = (_set_data & 0xFF0000) >> 16; \ + data[_offset + 0] = (_set_data & 0xFF000000) >> 24; \ + } while (0) + +#define RPC_SET_BE16(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + u64 _set_data = (SET_DATA); \ + data[_offset + 1] = (_set_data) & 0xFF; \ + data[_offset + 0] = (_set_data & 0xFF00) >> 8; \ + } while (0) + +#define RPC_SET_U8(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + data[OFFSET] = (SET_DATA) & 0xFF; \ + } while (0) + +#define RPC_GET_BE64(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + *(u32 *)(PTR) = \ + (data[_offset + 7] | data[_offset + 6] << 8 | \ + data[_offset + 5] << 16 | data[_offset + 4] << 24 | \ + data[_offset + 3] << 32 | data[_offset + 2] << 40 | \ + data[_offset + 1] << 48 | data[_offset + 0] << 56); \ + } while (0) + +#define RPC_GET_BE32(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + *(u32 *)(PTR) = \ + (data[_offset + 3] | data[_offset + 2] << 8 | \ + data[_offset + 1] << 16 | data[_offset + 0] << 24); \ + } while (0) + +#define RPC_GET_BE16(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + *(u16 *)(PTR) = (data[_offset + 1] | data[_offset + 0] << 8); \ + } while (0) + +#define RPC_GET_U8(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + *(u8 *)(PTR) = (data[OFFSET]); \ + } while (0) + +/* + * Defines for SC PM Power Mode + */ +#define TH1520_AON_PM_PW_MODE_OFF 0 /* Power off */ +#define TH1520_AON_PM_PW_MODE_STBY 1 /* Power in standby */ +#define TH1520_AON_PM_PW_MODE_LP 2 /* Power in low-power */ +#define TH1520_AON_PM_PW_MODE_ON 3 /* Power on */ + +/* + * Defines for AON power islands + */ +#define TH1520_AON_AUDIO_PD 0 +#define TH1520_AON_VDEC_PD 1 +#define TH1520_AON_NPU_PD 2 +#define TH1520_AON_VENC_PD 3 +#define TH1520_AON_GPU_PD 4 +#define TH1520_AON_DSP0_PD 5 +#define TH1520_AON_DSP1_PD 6 + +struct th1520_aon_chan *th1520_aon_init(struct device *dev); +void th1520_aon_deinit(struct th1520_aon_chan *aon_chan); + +int th1520_aon_call_rpc(struct th1520_aon_chan *aon_chan, void *msg); +int th1520_aon_power_update(struct th1520_aon_chan *aon_chan, u16 rsrc, + bool power_on); + +#endif /* _THEAD_AON_H */ diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 3abe614ef5f0..45ad2408a80c 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -15,6 +15,7 @@ #define _LINUX_FOLIO_QUEUE_H #include <linux/pagevec.h> +#include <linux/mm.h> /* * Segment in a queue of running buffers. Each segment can hold a number of @@ -37,16 +38,20 @@ struct folio_queue { #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif + unsigned int rreq_id; + unsigned int debug_id; }; /** * folioq_init - Initialise a folio queue segment * @folioq: The segment to initialise + * @rreq_id: The request identifier to use in tracelines. * - * Initialise a folio queue segment. Note that the folio pointers are - * left uninitialised. + * Initialise a folio queue segment and set an identifier to be used in traces. + * + * Note that the folio pointers are left uninitialised. */ -static inline void folioq_init(struct folio_queue *folioq) +static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) { folio_batch_init(&folioq->vec); folioq->next = NULL; @@ -54,6 +59,8 @@ static inline void folioq_init(struct folio_queue *folioq) folioq->marks = 0; folioq->marks2 = 0; folioq->marks3 = 0; + folioq->rreq_id = rreq_id; + folioq->debug_id = 0; } /** @@ -210,13 +217,6 @@ static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) clear_bit(slot, &folioq->marks3); } -static inline unsigned int __folio_order(struct folio *folio) -{ - if (!folio_test_large(folio)) - return 0; - return folio->_flags_1 & 0xff; -} - /** * folioq_append: Add a folio to a folio queue segment * @folioq: The segment to add to @@ -235,7 +235,7 @@ static inline unsigned int folioq_append(struct folio_queue *folioq, struct foli unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; - folioq->orders[slot] = __folio_order(folio); + folioq->orders[slot] = folio_order(folio); return slot; } @@ -257,7 +257,7 @@ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; - folioq->orders[slot] = __folio_order(folio); + folioq->orders[slot] = folio_order(folio); folioq_mark(folioq, slot); return slot; } diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index f39869588117..702099f08929 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -5,47 +5,68 @@ #include <linux/compiler.h> #include <linux/ftrace.h> -#include <linux/rethook.h> +#include <linux/rcupdate.h> +#include <linux/refcount.h> +#include <linux/slab.h> struct fprobe; - typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *regs, void *entry_data); typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *regs, void *entry_data); /** + * struct fprobe_hlist_node - address based hash list node for fprobe. + * + * @hlist: The hlist node for address search hash table. + * @addr: One of the probing address of @fp. + * @fp: The fprobe which owns this. + */ +struct fprobe_hlist_node { + struct hlist_node hlist; + unsigned long addr; + struct fprobe *fp; +}; + +/** + * struct fprobe_hlist - hash list nodes for fprobe. + * + * @hlist: The hlist node for existence checking hash table. + * @rcu: rcu_head for RCU deferred release. + * @fp: The fprobe which owns this fprobe_hlist. + * @size: The size of @array. + * @array: The fprobe_hlist_node for each address to probe. + */ +struct fprobe_hlist { + struct hlist_node hlist; + struct rcu_head rcu; + struct fprobe *fp; + int size; + struct fprobe_hlist_node array[] __counted_by(size); +}; + +/** * struct fprobe - ftrace based probe. - * @ops: The ftrace_ops. + * * @nmissed: The counter for missing events. * @flags: The status flag. - * @rethook: The rethook data structure. (internal data) * @entry_data_size: The private data storage size. - * @nr_maxactive: The max number of active functions. * @entry_handler: The callback function for function entry. * @exit_handler: The callback function for function exit. + * @hlist_array: The fprobe_hlist for fprobe search from IP hash table. */ struct fprobe { -#ifdef CONFIG_FUNCTION_TRACER - /* - * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too. - * But user of fprobe may keep embedding the struct fprobe on their own - * code. To avoid build error, this will keep the fprobe data structure - * defined here, but remove ftrace_ops data structure. - */ - struct ftrace_ops ops; -#endif unsigned long nmissed; unsigned int flags; - struct rethook *rethook; size_t entry_data_size; - int nr_maxactive; fprobe_entry_cb entry_handler; fprobe_exit_cb exit_handler; + + struct fprobe_hlist *hlist_array; }; /* This fprobe is soft-disabled. */ @@ -121,4 +142,9 @@ static inline void enable_fprobe(struct fprobe *fp) fp->flags &= ~FPROBE_FL_DISABLED; } +/* The entry data size is 4 bits (=16) * sizeof(long) in maximum */ +#define FPROBE_DATA_SIZE_BITS 4 +#define MAX_FPROBE_DATA_SIZE_WORD ((1L << FPROBE_DATA_SIZE_BITS) - 1) +#define MAX_FPROBE_DATA_SIZE (MAX_FPROBE_DATA_SIZE_WORD * sizeof(long)) + #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index f7efc6866ebc..1fe7bf10d442 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FS_H #define _LINUX_FS_H +#include <linux/vfsdebug.h> #include <linux/linkage.h> #include <linux/wait_bit.h> #include <linux/kdev_t.h> @@ -173,13 +174,20 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) -/* FMODE_* bit 24 */ - /* File is embedded in backing_file object */ -#define FMODE_BACKING ((__force fmode_t)(1 << 25)) +#define FMODE_BACKING ((__force fmode_t)(1 << 24)) -/* File was opened by fanotify and shouldn't generate fanotify events */ -#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) +/* + * Together with FMODE_NONOTIFY_PERM defines which fsnotify events shouldn't be + * generated (see below) + */ +#define FMODE_NONOTIFY ((__force fmode_t)(1 << 25)) + +/* + * Together with FMODE_NONOTIFY defines which fsnotify events shouldn't be + * generated (see below) + */ +#define FMODE_NONOTIFY_PERM ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) @@ -191,6 +199,31 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) /* + * The two FMODE_NONOTIFY* define which fsnotify events should not be generated + * for a file. These are the possible values of (f->f_mode & + * FMODE_FSNOTIFY_MASK) and their meaning: + * + * FMODE_NONOTIFY - suppress all (incl. non-permission) events. + * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events. + * FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only pre-content events. + */ +#define FMODE_FSNOTIFY_MASK \ + (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM) + +#define FMODE_FSNOTIFY_NONE(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == FMODE_NONOTIFY) +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +#define FMODE_FSNOTIFY_PERM(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == 0 || \ + (mode & FMODE_FSNOTIFY_MASK) == (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM)) +#define FMODE_FSNOTIFY_HSM(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == 0) +#else +#define FMODE_FSNOTIFY_PERM(mode) 0 +#define FMODE_FSNOTIFY_HSM(mode) 0 +#endif + +/* * Attribute flags. These should be or-ed together to figure out what * has been changed! */ @@ -322,6 +355,7 @@ struct readahead_control; #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND #define IOCB_ATOMIC (__force int) RWF_ATOMIC +#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) @@ -348,6 +382,7 @@ struct readahead_control; #define IOCB_DIO_CALLER_COMP (1 << 22) /* kiocb is a read or write operation submitted by fs/aio.c. */ #define IOCB_AIO_RW (1 << 23) +#define IOCB_HAS_METADATA (1 << 24) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ @@ -356,7 +391,8 @@ struct readahead_control; { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ - { IOCB_ATOMIC, "ATOMIC"}, \ + { IOCB_ATOMIC, "ATOMIC" }, \ + { IOCB_DONTCACHE, "DONTCACHE" }, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ @@ -626,6 +662,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 #define IOP_MGTIME 0x0020 +#define IOP_CACHED_LINK 0x0040 /* * Keep mostly read-only and often accessed (especially for @@ -723,7 +760,10 @@ struct inode { }; struct file_lock_context *i_flctx; struct address_space i_data; - struct list_head i_devices; + union { + struct list_head i_devices; + int i_linklen; + }; union { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; @@ -749,6 +789,15 @@ struct inode { void *i_private; /* fs or device private pointer */ } __randomize_layout; +static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) +{ + VFS_WARN_ON_INODE(strlen(link) != linklen, inode); + VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode); + inode->i_link = link; + inode->i_linklen = linklen; + inode->i_opflags |= IOP_CACHED_LINK; +} + /* * Get bit address from inode->i_state to use with wait_var_event() * infrastructre. @@ -1008,7 +1057,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) /** * struct file - Represents a file - * @f_ref: reference count * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. * @f_mode: FMODE_* flags often used in hotpaths * @f_op: file operations @@ -1018,12 +1066,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_flags: file flags * @f_iocb_flags: iocb flags * @f_cred: stashed credentials of creator/opener + * @f_owner: file owner * @f_path: path of the file * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position * @f_security: LSM security context of this file - * @f_owner: file owner * @f_wb_err: writeback error * @f_sb_err: per sb writeback errors * @f_ep: link of all epoll hooks for this file @@ -1031,9 +1079,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_llist: work queue entrypoint * @f_ra: file's readahead state * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) + * @f_ref: reference count */ struct file { - file_ref_t f_ref; spinlock_t f_lock; fmode_t f_mode; const struct file_operations *f_op; @@ -1043,6 +1091,7 @@ struct file { unsigned int f_flags; unsigned int f_iocb_flags; const struct cred *f_cred; + struct fown_struct *f_owner; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; union { @@ -1056,7 +1105,6 @@ struct file { void *f_security; #endif /* --- cacheline 2 boundary (128 bytes) --- */ - struct fown_struct *f_owner; errseq_t f_wb_err; errseq_t f_sb_err; #ifdef CONFIG_EPOLL @@ -1068,6 +1116,7 @@ struct file { struct file_ra_state f_ra; freeptr_t f_freeptr; }; + file_ref_t f_ref; /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ @@ -1199,11 +1248,19 @@ extern int send_sigurg(struct file *file); #define SB_NOUSER BIT(31) /* These flags relate to encoding and casefolding */ -#define SB_ENC_STRICT_MODE_FL (1 << 0) +#define SB_ENC_STRICT_MODE_FL (1 << 0) +#define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1) #define sb_has_strict_encoding(sb) \ (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) +#if IS_ENABLED(CONFIG_UNICODE) +#define sb_no_casefold_compat_fallback(sb) \ + (sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL) +#else +#define sb_no_casefold_compat_fallback(sb) (1) +#endif + /* * Umount options */ @@ -1232,6 +1289,7 @@ extern int send_sigurg(struct file *file); #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ +#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ /* Possible states of 'frozen' field */ enum { @@ -1921,8 +1979,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, */ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); -int vfs_mkdir(struct mnt_idmap *, struct inode *, - struct dentry *, umode_t); +struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, + struct dentry *, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *, @@ -1979,7 +2037,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); int vfs_utimes(const struct path *path, struct timespec64 *times); -extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, @@ -2127,6 +2185,8 @@ struct file_operations { #define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) +/* File system supports uncached read/write buffered IO */ +#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, @@ -2149,8 +2209,8 @@ struct inode_operations { int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *, const char *); - int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *, - umode_t); + struct dentry *(*mkdir) (struct mnt_idmap *, struct inode *, + struct dentry *, umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); @@ -2284,6 +2344,7 @@ struct super_operations { #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ +#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2340,6 +2401,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) @@ -2554,6 +2616,7 @@ struct file_system_type { #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ #define FS_MGTIME 64 /* FS uses multigrain timestamps */ +#define FS_LBS 128 /* FS supports LBS */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2591,9 +2654,6 @@ static inline bool is_mgtime(const struct inode *inode) extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -extern struct dentry *mount_single(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2732,13 +2792,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) return mnt_idmap(mnt) != &nop_mnt_idmap; } -extern long vfs_truncate(const struct path *, loff_t); +int vfs_truncate(const struct path *, loff_t); int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); -extern long do_sys_open(int dfd, const char __user *filename, int flags, - umode_t mode); +int do_sys_open(int dfd, const char __user *filename, int flags, + umode_t mode); extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(const struct path *, @@ -2751,6 +2811,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt, } struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct path *backing_file_user_path(struct file *f); @@ -2787,7 +2849,10 @@ extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); -extern struct filename *getname(const char __user *); +static inline struct filename *getname(const char __user *name) +{ + return getname_flags(name, 0); +} extern struct filename *getname_kernel(const char *); extern struct filename *__getname_maybe_null(const char __user *); static inline struct filename *getname_maybe_null(const char __user *name, int flags) @@ -2800,6 +2865,13 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f return __getname_maybe_null(name); } extern void putname(struct filename *name); +DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T)) + +static inline struct filename *refname(struct filename *name) +{ + atomic_inc(&name->refcnt); + return name; +} extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); @@ -2874,6 +2946,8 @@ extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); +int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, + loff_t end); static inline int file_write_and_wait(struct file *file) { @@ -2906,6 +2980,11 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); if (ret) return ret; + } else if (iocb->ki_flags & IOCB_DONTCACHE) { + struct address_space *mapping = iocb->ki_filp->f_mapping; + + filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, + iocb->ki_pos - 1); } return count; @@ -3059,6 +3138,34 @@ static inline void allow_write_access(struct file *file) if (file) atomic_inc(&file_inode(file)->i_writecount); } + +/* + * Do not prevent write to executable file when watched by pre-content events. + * + * Note that FMODE_FSNOTIFY_HSM mode is set depending on pre-content watches at + * the time of file open and remains constant for entire lifetime of the file, + * so if pre-content watches are added post execution or removed before the end + * of the execution, it will not cause i_writecount reference leak. + */ +static inline int exe_file_deny_write_access(struct file *exe_file) +{ + if (unlikely(FMODE_FSNOTIFY_HSM(exe_file->f_mode))) + return 0; + return deny_write_access(exe_file); +} +static inline void exe_file_allow_write_access(struct file *exe_file) +{ + if (unlikely(!exe_file || FMODE_FSNOTIFY_HSM(exe_file->f_mode))) + return; + allow_write_access(exe_file); +} + +static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode) +{ + file->f_mode &= ~FMODE_FSNOTIFY_MASK; + file->f_mode |= mode; +} + static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; @@ -3198,7 +3305,11 @@ static inline void __iget(struct inode *inode) extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); -extern struct inode *new_inode_pseudo(struct super_block *sb); +struct inode *alloc_inode(struct super_block *sb); +static inline struct inode *new_inode_pseudo(struct super_block *sb) +{ + return alloc_inode(sb); +} extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); @@ -3351,8 +3462,10 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -extern int readlink_copy(char __user *, int, const char *); +extern int readlink_copy(char __user *, int, const char *, int); extern int page_readlink(struct dentry *, char __user *, int); +extern const char *page_get_link_raw(struct dentry *, struct inode *, + struct delayed_call *); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); @@ -3439,6 +3552,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; @@ -3613,6 +3728,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) return -EOPNOTSUPP; } + if (flags & RWF_DONTCACHE) { + /* file system must support it */ + if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE)) + return -EOPNOTSUPP; + /* DAX mappings not supported */ + if (IS_DAX(ki->ki_filp->f_mapping->host)) + return -EOPNOTSUPP; + } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; @@ -3706,11 +3829,9 @@ struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) -#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) -#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ - (flag & __FMODE_NONOTIFY))) +#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) static inline bool is_sxid(umode_t mode) { diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 4b4bfef6f053..a19e4bd32e4d 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -144,8 +144,6 @@ extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); extern void fc_drop_locked(struct fs_context *fc); -int reconfigure_single(struct super_block *s, - int flags, void *data); extern int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 3cef566088fc..53e566efd5fd 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -84,6 +84,8 @@ extern int fs_lookup_param(struct fs_context *fc, extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); +extern const struct constant_table bool_names[]; + #ifdef CONFIG_VALIDATE_FS_PARSER extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, int low, int high, int special); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607f..266e6c9e6f83 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -628,7 +628,7 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, term_func, term_func_priv, using_pgpriv2, caching); else if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); } diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 772f822dc6b8..56fad33043d5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -192,7 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); +int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags); static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) @@ -309,10 +310,8 @@ static inline void fscrypt_prepare_dentry(struct dentry *dentry, /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); -struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, - unsigned int len, - unsigned int offs, - gfp_t gfp_flags); +struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, + size_t len, size_t offs, gfp_t gfp_flags); int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags); @@ -479,10 +478,8 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } -static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, - unsigned int len, - unsigned int offs, - gfp_t gfp_flags) +static inline struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, + size_t len, size_t offs, gfp_t gfp_flags) { return ERR_PTR(-EOPNOTSUPP); } @@ -711,8 +708,8 @@ static inline u64 fscrypt_fname_siphash(const struct inode *dir, return 0; } -static inline int fscrypt_d_revalidate(struct dentry *dentry, - unsigned int flags) +static inline int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return 1; } diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index c90ec889bfc2..897d6211c163 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -417,8 +417,6 @@ int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, void fsl_mc_portal_free(struct fsl_mc_io *mc_io); -int fsl_mc_portal_reset(struct fsl_mc_io *mc_io); - int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev, enum fsl_mc_pool_type pool_type, struct fsl_mc_device **new_mc_adev); @@ -438,21 +436,21 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, extern const struct bus_type fsl_mc_bus_type; -extern struct device_type fsl_mc_bus_dprc_type; -extern struct device_type fsl_mc_bus_dpni_type; -extern struct device_type fsl_mc_bus_dpio_type; -extern struct device_type fsl_mc_bus_dpsw_type; -extern struct device_type fsl_mc_bus_dpbp_type; -extern struct device_type fsl_mc_bus_dpcon_type; -extern struct device_type fsl_mc_bus_dpmcp_type; -extern struct device_type fsl_mc_bus_dpmac_type; -extern struct device_type fsl_mc_bus_dprtc_type; -extern struct device_type fsl_mc_bus_dpseci_type; -extern struct device_type fsl_mc_bus_dpdmux_type; -extern struct device_type fsl_mc_bus_dpdcei_type; -extern struct device_type fsl_mc_bus_dpaiop_type; -extern struct device_type fsl_mc_bus_dpci_type; -extern struct device_type fsl_mc_bus_dpdmai_type; +extern const struct device_type fsl_mc_bus_dprc_type; +extern const struct device_type fsl_mc_bus_dpni_type; +extern const struct device_type fsl_mc_bus_dpio_type; +extern const struct device_type fsl_mc_bus_dpsw_type; +extern const struct device_type fsl_mc_bus_dpbp_type; +extern const struct device_type fsl_mc_bus_dpcon_type; +extern const struct device_type fsl_mc_bus_dpmcp_type; +extern const struct device_type fsl_mc_bus_dpmac_type; +extern const struct device_type fsl_mc_bus_dprtc_type; +extern const struct device_type fsl_mc_bus_dpseci_type; +extern const struct device_type fsl_mc_bus_dpdmux_type; +extern const struct device_type fsl_mc_bus_dpdcei_type; +extern const struct device_type fsl_mc_bus_dpaiop_type; +extern const struct device_type fsl_mc_bus_dpci_type; +extern const struct device_type fsl_mc_bus_dpdmai_type; static inline bool is_fsl_mc_bus_dprc(const struct fsl_mc_device *mc_dev) { diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 278620e063ab..454d8e466958 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -108,38 +108,35 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } -static inline int fsnotify_file(struct file *file, __u32 mask) +static inline int fsnotify_path(const struct path *path, __u32 mask) { - const struct path *path; + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); +} +static inline int fsnotify_file(struct file *file, __u32 mask) +{ /* * FMODE_NONOTIFY are fds generated by fanotify itself which should not * generate new events. We also don't want to generate events for * FMODE_PATH fds (involves open & close events) as they are just * handle creation / destruction events and not "real" file events. */ - if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) + if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; - path = &file->f_path; - /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ - if (mask & ALL_FSNOTIFY_PERM_EVENTS && - !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, - FSNOTIFY_PRIO_CONTENT)) - return 0; - - return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); + return fsnotify_path(&file->f_path, mask); } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + +void file_set_fsnotify_mode_from_watchers(struct file *file); + /* * fsnotify_file_area_perm - permission hook before access to file range */ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { - __u32 fsnotify_mask = FS_ACCESS_PERM; - /* * filesystem may be modified in the context of permission events * (e.g. by HSM filling a file on access), so sb freeze protection @@ -147,14 +144,64 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, */ lockdep_assert_once(file_write_not_started(file)); + if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS))) + return 0; + + if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) + return 0; + + /* + * read()/write() and other types of access generate pre-content events. + */ + if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { + int ret = fsnotify_pre_content(&file->f_path, ppos, count); + + if (ret) + return ret; + } + if (!(perm_mask & MAY_READ)) return 0; - return fsnotify_file(file, fsnotify_mask); + /* + * read() also generates the legacy FS_ACCESS_PERM event, so content + * scanners can inspect the content filled by pre-content event. + */ + return fsnotify_path(&file->f_path, FS_ACCESS_PERM); +} + +/* + * fsnotify_mmap_perm - permission hook before mmap of file range + */ +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + /* + * mmap() generates only pre-content events. + */ + if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode))) + return 0; + + return fsnotify_pre_content(&file->f_path, &off, len); } /* - * fsnotify_file_perm - permission hook before file access + * fsnotify_truncate_perm - permission hook before file truncate + */ +static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) +{ + struct inode *inode = d_inode(path->dentry); + + if (!(inode->i_sb->s_iflags & SB_I_ALLOW_HSM) || + !fsnotify_sb_has_priority_watchers(inode->i_sb, + FSNOTIFY_PRIO_PRE_CONTENT)) + return 0; + + return fsnotify_pre_content(path, &length, 0); +} + +/* + * fsnotify_file_perm - permission hook before file access (unknown range) */ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { @@ -168,22 +215,40 @@ static inline int fsnotify_open_perm(struct file *file) { int ret; + if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) + return 0; + if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); + ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) return ret; } - return fsnotify_file(file, FS_OPEN_PERM); + return fsnotify_path(&file->f_path, FS_OPEN_PERM); } #else +static inline void file_set_fsnotify_mode_from_watchers(struct file *file) +{ +} + static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { return 0; } +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + return 0; +} + +static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) +{ + return 0; +} + static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return 0; @@ -255,6 +320,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) __fsnotify_vfsmount_delete(mnt); } +static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + __fsnotify_mntns_delete(mntns); +} + /* * fsnotify_inoderemove - an inode is going away */ @@ -463,4 +533,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, NULL, NULL, NULL, 0); } +static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); +} + +static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_DETACH, ns, mnt); +} + +static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_MOVE, ns, mnt); +} + #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 3ecf7768e577..fc27b53c58c2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -55,6 +55,13 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +/* #define FS_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */ + +#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FS_MNT_DETACH 0x02000000 /* Mount was detached */ +#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH) /* * Set on inode mark that cares about things that happen to its children. @@ -77,8 +84,17 @@ */ #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) -#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC_PERM) +/* Mount namespace events */ +#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH) + +/* Content events can be used to inspect file content */ +#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \ + FS_ACCESS_PERM) +/* Pre-content events can be used to fill file content */ +#define FSNOTIFY_PRE_CONTENT_EVENTS (FS_PRE_ACCESS) + +#define ALL_FSNOTIFY_PERM_EVENTS (FSNOTIFY_CONTENT_PERM_EVENTS | \ + FSNOTIFY_PRE_CONTENT_EVENTS) /* * This is a list of all events that may get sent to a parent that is watching @@ -99,6 +115,7 @@ /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ + FSNOTIFY_MNT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ @@ -285,9 +302,11 @@ static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, + FSNOTIFY_EVENT_FILE_RANGE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, FSNOTIFY_EVENT_DENTRY, + FSNOTIFY_EVENT_MNT, FSNOTIFY_EVENT_ERROR, }; @@ -297,6 +316,22 @@ struct fs_error_report { struct super_block *sb; }; +struct file_range { + const struct path *path; + loff_t pos; + size_t count; +}; + +static inline const struct path *file_range_path(const struct file_range *range) +{ + return range->path; +} + +struct fsnotify_mnt { + const struct mnt_namespace *ns; + u64 mnt_id; +}; + static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { @@ -306,6 +341,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type) return d_inode(data); case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); + case FSNOTIFY_EVENT_FILE_RANGE: + return d_inode(file_range_path(data)->dentry); case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *)data)->inode; default: @@ -321,6 +358,8 @@ static inline struct dentry *fsnotify_data_dentry(const void *data, int data_typ return (struct dentry *)data; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data)->dentry; default: return NULL; } @@ -332,6 +371,8 @@ static inline const struct path *fsnotify_data_path(const void *data, switch (data_type) { case FSNOTIFY_EVENT_PATH: return data; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data); default: return NULL; } @@ -347,6 +388,8 @@ static inline struct super_block *fsnotify_data_sb(const void *data, return ((struct dentry *)data)->d_sb; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry->d_sb; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data)->dentry->d_sb; case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *) data)->sb; default: @@ -354,6 +397,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data, } } +static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_MNT: + return data; + default: + return NULL; + } +} + +static inline u64 fsnotify_data_mnt_id(const void *data, int data_type) +{ + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + + return mnt_data ? mnt_data->mnt_id : 0; +} + static inline struct fs_error_report *fsnotify_data_error_report( const void *data, int data_type) @@ -366,6 +427,18 @@ static inline struct fs_error_report *fsnotify_data_error_report( } } +static inline const struct file_range *fsnotify_data_file_range( + const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_FILE_RANGE: + return (struct file_range *)data; + default: + return NULL; + } +} + /* * Index to merged marks iterator array that correlates to a type of watch. * The type of watched object can be deduced from the iterator type, but not @@ -379,6 +452,7 @@ enum fsnotify_iter_type { FSNOTIFY_ITER_TYPE_SB, FSNOTIFY_ITER_TYPE_PARENT, FSNOTIFY_ITER_TYPE_INODE2, + FSNOTIFY_ITER_TYPE_MNTNS, FSNOTIFY_ITER_TYPE_COUNT }; @@ -388,6 +462,7 @@ enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, + FSNOTIFY_OBJ_TYPE_MNTNS, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; @@ -572,8 +647,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); +extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns); extern void fsnotify_sb_free(struct super_block *sb); extern u32 fsnotify_get_cookie(void); +extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) { @@ -830,21 +907,6 @@ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); -/* run all the marks in a group, and clear all of the vfsmount marks */ -static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); -} -/* run all the marks in a group, and clear all of the inode marks */ -static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); -} -/* run all the marks in a group, and clear all of the sn marks */ -static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); -} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); @@ -854,9 +916,17 @@ static inline void fsnotify_init_event(struct fsnotify_event *event) { INIT_LIST_HEAD(&event->list); } +int fsnotify_pre_content(const struct path *path, const loff_t *ppos, + size_t count); #else +static inline int fsnotify_pre_content(const struct path *path, + const loff_t *ppos, size_t count) +{ + return 0; +} + static inline int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, struct inode *inode, u32 cookie) @@ -879,6 +949,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} +static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{} + static inline void fsnotify_sb_free(struct super_block *sb) {} @@ -893,6 +966,9 @@ static inline u32 fsnotify_get_cookie(void) static inline void fsnotify_unmount_inodes(struct super_block *sb) {} +static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aa9ddd1e4bb6..fbabc3d848b3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,8 @@ struct dyn_ftrace; char *arch_ftrace_match_adjust(char *str, const char *search); -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL -struct fgraph_ret_regs; -unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs); +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS +unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs); #else unsigned long ftrace_return_to_handler(unsigned long frame_pointer); #endif @@ -134,6 +133,13 @@ extern int ftrace_enabled; * Also, architecture dependent fields can be used for internal process. * (e.g. orig_ax on x86_64) * + * Basically, ftrace_regs stores the registers related to the context. + * On function entry, registers for function parameters and hooking the + * function call are stored, and on function exit, registers for function + * return value and frame pointers are stored. + * + * And also, it dpends on the context that which registers are restored + * from the ftrace_regs. * On the function entry, those registers will be restored except for * the stack pointer, so that user can change the function parameters * and instruction pointer (e.g. live patching.) @@ -170,6 +176,12 @@ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ +#ifdef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS + +static_assert(sizeof(struct pt_regs) == ftrace_regs_size()); + +#endif /* CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ + static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) { if (!fregs) @@ -178,6 +190,54 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs return arch_ftrace_get_regs(fregs); } +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \ + defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS) + +static __always_inline struct pt_regs * +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + /* + * If CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS=y, ftrace_regs memory + * layout is including pt_regs. So always returns that address. + * Since arch_ftrace_get_regs() will check some members and may return + * NULL, we can not use it. + */ + return &arch_ftrace_regs(fregs)->regs; +} + +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ + +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + +/* + * Please define arch dependent pt_regs which compatible to the + * perf_arch_fetch_caller_regs() but based on ftrace_regs. + * This requires + * - user_mode(_regs) returns false (always kernel mode). + * - able to use the _regs for stack trace. + */ +#ifndef arch_ftrace_fill_perf_regs +/* As same as perf_arch_fetch_caller_regs(), do nothing by default */ +#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0) +#endif + +static __always_inline struct pt_regs * +ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + arch_ftrace_fill_perf_regs(fregs, regs); + return regs; +} + +#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ + +static __always_inline struct pt_regs * +ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + return &arch_ftrace_regs(fregs)->regs; +} + +#endif + /* * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs. * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs. @@ -190,6 +250,23 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) return ftrace_get_regs(fregs) != NULL; } +#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API +static __always_inline unsigned long +ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth) +{ + unsigned long *stackp; + + stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs); + if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) == + ((unsigned long)stackp & ~(THREAD_SIZE - 1))) + return *(stackp + nth); + + return 0; +} +#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */ +#define ftrace_regs_get_kernel_stack_nth(fregs, nth) (0L) +#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */ + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); @@ -545,6 +622,19 @@ enum { FTRACE_MAY_SLEEP = (1 << 5), }; +/* Arches can override ftrace_get_symaddr() to convert fentry_ip to symaddr. */ +#ifndef ftrace_get_symaddr +/** + * ftrace_get_symaddr - return the symbol address from fentry_ip + * @fentry_ip: the address of ftrace location + * + * Get the symbol address from @fentry_ip (fast path). If there is no fast + * search path, this returns 0. + * User may need to use kallsyms API to find the symbol address. + */ +#define ftrace_get_symaddr(fentry_ip) (0) +#endif + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); @@ -1061,20 +1151,21 @@ struct ftrace_graph_ret { int depth; /* Number of functions that overran the depth limit for current task */ unsigned int overrun; - unsigned long long calltime; - unsigned long long rettime; } __packed; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, - struct fgraph_ops *); /* return */ + struct fgraph_ops *, + struct ftrace_regs *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *); /* entry */ + struct fgraph_ops *, + struct ftrace_regs *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops); + struct fgraph_ops *gops, + struct ftrace_regs *fregs); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -1114,8 +1205,15 @@ struct ftrace_ret_stack { extern void return_to_handler(void); extern int -function_graph_enter(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp); +function_graph_enter_regs(unsigned long ret, unsigned long func, + unsigned long frame_pointer, unsigned long *retp, + struct ftrace_regs *fregs); + +static inline int function_graph_enter(unsigned long ret, unsigned long func, + unsigned long fp, unsigned long *retp) +{ + return function_graph_enter_regs(ret, func, fp, retp, NULL); +} struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int skip); diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index be1ed0c891d0..15627ceea9bc 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -30,7 +30,14 @@ struct ftrace_regs; override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) +#define ftrace_regs_get_frame_pointer(fregs) \ + frame_pointer(&arch_ftrace_regs(fregs)->regs) #endif /* HAVE_ARCH_FTRACE_REGS */ +/* This can be overridden by the architectures */ +#ifndef FTRACE_REGS_MAX_ARGS +# define FTRACE_REGS_MAX_ARGS 6 +#endif + #endif /* _LINUX_FTRACE_REGS_H */ diff --git a/include/linux/fwctl.h b/include/linux/fwctl.h new file mode 100644 index 000000000000..5d61fc8a6871 --- /dev/null +++ b/include/linux/fwctl.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __LINUX_FWCTL_H +#define __LINUX_FWCTL_H +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/cleanup.h> +#include <uapi/fwctl/fwctl.h> + +struct fwctl_device; +struct fwctl_uctx; + +/** + * struct fwctl_ops - Driver provided operations + * + * fwctl_unregister() will wait until all excuting ops are completed before it + * returns. Drivers should be mindful to not let their ops run for too long as + * it will block device hot unplug and module unloading. + */ +struct fwctl_ops { + /** + * @device_type: The drivers assigned device_type number. This is uABI. + */ + enum fwctl_device_type device_type; + /** + * @uctx_size: The size of the fwctl_uctx struct to allocate. The first + * bytes of this memory will be a fwctl_uctx. The driver can use the + * remaining bytes as its private memory. + */ + size_t uctx_size; + /** + * @open_uctx: Called when a file descriptor is opened before the uctx + * is ever used. + */ + int (*open_uctx)(struct fwctl_uctx *uctx); + /** + * @close_uctx: Called when the uctx is destroyed, usually when the FD + * is closed. + */ + void (*close_uctx)(struct fwctl_uctx *uctx); + /** + * @info: Implement FWCTL_INFO. Return a kmalloc() memory that is copied + * to out_device_data. On input length indicates the size of the user + * buffer on output it indicates the size of the memory. The driver can + * ignore length on input, the core code will handle everything. + */ + void *(*info)(struct fwctl_uctx *uctx, size_t *length); + /** + * @fw_rpc: Implement FWCTL_RPC. Deliver rpc_in/in_len to the FW and + * return the response and set out_len. rpc_in can be returned as the + * response pointer. Otherwise the returned pointer is freed with + * kvfree(). + */ + void *(*fw_rpc)(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, + void *rpc_in, size_t in_len, size_t *out_len); +}; + +/** + * struct fwctl_device - Per-driver registration struct + * @dev: The sysfs (class/fwctl/fwctlXX) device + * + * Each driver instance will have one of these structs with the driver private + * data following immediately after. This struct is refcounted, it is freed by + * calling fwctl_put(). + */ +struct fwctl_device { + struct device dev; + /* private: */ + struct cdev cdev; + + /* Protect uctx_list */ + struct mutex uctx_list_lock; + struct list_head uctx_list; + /* + * Protect ops, held for write when ops becomes NULL during unregister, + * held for read whenever ops is loaded or an ops function is running. + */ + struct rw_semaphore registration_lock; + const struct fwctl_ops *ops; +}; + +struct fwctl_device *_fwctl_alloc_device(struct device *parent, + const struct fwctl_ops *ops, + size_t size); +/** + * fwctl_alloc_device - Allocate a fwctl + * @parent: Physical device that provides the FW interface + * @ops: Driver ops to register + * @drv_struct: 'struct driver_fwctl' that holds the struct fwctl_device + * @member: Name of the struct fwctl_device in @drv_struct + * + * This allocates and initializes the fwctl_device embedded in the drv_struct. + * Upon success the pointer must be freed via fwctl_put(). Returns a 'drv_struct + * \*' on success, NULL on error. + */ +#define fwctl_alloc_device(parent, ops, drv_struct, member) \ + ({ \ + static_assert(__same_type(struct fwctl_device, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_fwctl_alloc_device(parent, ops, \ + sizeof(drv_struct)); \ + }) + +static inline struct fwctl_device *fwctl_get(struct fwctl_device *fwctl) +{ + get_device(&fwctl->dev); + return fwctl; +} +static inline void fwctl_put(struct fwctl_device *fwctl) +{ + put_device(&fwctl->dev); +} +DEFINE_FREE(fwctl, struct fwctl_device *, if (_T) fwctl_put(_T)); + +int fwctl_register(struct fwctl_device *fwctl); +void fwctl_unregister(struct fwctl_device *fwctl); + +/** + * struct fwctl_uctx - Per user FD context + * @fwctl: fwctl instance that owns the context + * + * Every FD opened by userspace will get a unique context allocation. Any driver + * private data will follow immediately after. + */ +struct fwctl_uctx { + struct fwctl_device *fwctl; + /* private: */ + /* Head at fwctl_device::uctx_list */ + struct list_head uctx_list_entry; +}; + +#endif diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 0d79070c5a70..097be89487bf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -2,6 +2,11 @@ /* * fwnode.h - Firmware device node object handle type definition. * + * This header file provides low-level data types and definitions for firmware + * and device property providers. The respective API header files supplied by + * them should contain all of the requisite data types and definitions for end + * users, so including it directly should not be necessary. + * * Copyright (C) 2015, Intel Corporation * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> */ @@ -91,7 +96,7 @@ struct fwnode_endpoint { #define SWNODE_GRAPH_PORT_NAME_FMT "port@%u" #define SWNODE_GRAPH_ENDPOINT_NAME_FMT "endpoint@%u" -#define NR_FWNODE_REFERENCE_ARGS 8 +#define NR_FWNODE_REFERENCE_ARGS 16 /** * struct fwnode_reference_args - Fwnode reference with additional arguments @@ -112,6 +117,7 @@ struct fwnode_reference_args { * @device_is_available: Return true if the device is available. * @device_get_match_data: Return the device driver match data. * @property_present: Return true if a property is present. + * @property_read_bool: Return a boolean property value. * @property_read_int_array: Read an array of integer properties. Return zero on * success, a negative error code otherwise. * @property_read_string_array: Read an array of string properties. Return zero @@ -141,6 +147,8 @@ struct fwnode_operations { (*device_get_dma_attr)(const struct fwnode_handle *fwnode); bool (*property_present)(const struct fwnode_handle *fwnode, const char *propname); + bool (*property_read_bool)(const struct fwnode_handle *fwnode, + const char *propname); int (*property_read_int_array)(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b0fe9f62d15b..c9fa6309c903 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,6 +39,25 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } +static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags) +{ + /* + * !__GFP_DIRECT_RECLAIM -> direct claim is not allowed. + * !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd. + * All GFP_* flags including GFP_NOWAIT use one or both flags. + * try_alloc_pages() is the only API that doesn't specify either flag. + * + * This is stronger than GFP_NOWAIT or GFP_ATOMIC because + * those are guaranteed to never block on a sleeping lock. + * Here we are enforcing that the allocation doesn't ever spin + * on any locks (i.e. only trylocks). There is no high level + * GFP_$FOO flag for this use in try_alloc_pages() as the + * regular page allocator doesn't fully support this + * allocation mode. + */ + return !!(gfp_flags & __GFP_RECLAIM); +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else @@ -212,35 +231,31 @@ struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, - struct list_head *page_list, struct page **page_array); #define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__)) -unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, +unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array); -#define alloc_pages_bulk_array_mempolicy(...) \ - alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__)) +#define alloc_pages_bulk_mempolicy(...) \ + alloc_hooks(alloc_pages_bulk_mempolicy_noprof(__VA_ARGS__)) /* Bulk allocate order-0 pages */ -#define alloc_pages_bulk_list(_gfp, _nr_pages, _list) \ - __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL) - -#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array) \ - __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array) +#define alloc_pages_bulk(_gfp, _nr_pages, _page_array) \ + __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _page_array) static inline unsigned long -alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, +alloc_pages_bulk_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, NULL, page_array); + return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, page_array); } -#define alloc_pages_bulk_array_node(...) \ - alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__)) +#define alloc_pages_bulk_node(...) \ + alloc_hooks(alloc_pages_bulk_node_noprof(__VA_ARGS__)) static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) { @@ -300,8 +315,6 @@ static inline struct page *alloc_pages_node_noprof(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order); -struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, - struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); @@ -312,11 +325,6 @@ static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order { return alloc_pages_node_noprof(numa_node_id(), gfp_mask, order); } -static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, - struct mempolicy *mpol, pgoff_t ilx, int nid) -{ - return alloc_pages_noprof(gfp, order); -} static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return __folio_alloc_node_noprof(gfp, order, numa_node_id()); @@ -331,7 +339,6 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde #endif #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) -#define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__)) #define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__)) #define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__)) #define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__)) @@ -347,6 +354,9 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp, } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) +struct page *try_alloc_pages_noprof(int nid, unsigned int order); +#define try_alloc_pages(...) alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__)) + extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); #define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__)) @@ -369,6 +379,7 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); +extern void free_pages_nolock(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); #define __free_page(page) __free_pages((page), 0) diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 6270150f4e29..c1ec62c11ed3 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -91,7 +91,7 @@ static inline int gpio_get_value_cansleep(unsigned gpio) } static inline void gpio_set_value_cansleep(unsigned gpio, int value) { - return gpiod_set_raw_value_cansleep(gpio_to_desc(gpio), value); + gpiod_set_raw_value_cansleep(gpio_to_desc(gpio), value); } static inline int gpio_get_value(unsigned gpio) @@ -100,7 +100,7 @@ static inline int gpio_get_value(unsigned gpio) } static inline void gpio_set_value(unsigned gpio, int value) { - return gpiod_set_raw_value(gpio_to_desc(gpio), value); + gpiod_set_raw_value(gpio_to_desc(gpio), value); } static inline int gpio_to_irq(unsigned gpio) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index db2dfbae8edb..8adc8e9cb4a7 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -3,6 +3,7 @@ #define __LINUX_GPIO_CONSUMER_H #include <linux/bits.h> +#include <linux/err.h> #include <linux/types.h> struct acpi_device; @@ -30,6 +31,7 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) #define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3) +/* GPIOD_FLAGS_BIT_NONEXCLUSIVE is DEPRECATED, don't use in new code. */ #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** @@ -110,8 +112,6 @@ int gpiod_get_direction(struct gpio_desc *desc); int gpiod_direction_input(struct gpio_desc *desc); int gpiod_direction_output(struct gpio_desc *desc, int value); int gpiod_direction_output_raw(struct gpio_desc *desc, int value); -int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); -int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); /* Value get/set from non-sleeping context */ int gpiod_get_value(const struct gpio_desc *desc); @@ -119,7 +119,7 @@ int gpiod_get_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_value(struct gpio_desc *desc, int value); +int gpiod_set_value(struct gpio_desc *desc, int value); int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -129,7 +129,7 @@ int gpiod_get_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_raw_value(struct gpio_desc *desc, int value); +int gpiod_set_raw_value(struct gpio_desc *desc, int value); int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -141,7 +141,7 @@ int gpiod_get_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_value_cansleep(struct gpio_desc *desc, int value); +int gpiod_set_value_cansleep(struct gpio_desc *desc, int value); int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -151,7 +151,7 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); +int gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -183,11 +183,9 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, #else /* CONFIG_GPIOLIB */ -#include <linux/err.h> +#include <linux/bug.h> #include <linux/kernel.h> -#include <asm/bug.h> - static inline int gpiod_count(struct device *dev, const char *con_id) { return 0; @@ -348,18 +346,6 @@ static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value) WARN_ON(desc); return -ENOSYS; } -static inline int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, - unsigned long flags) -{ - WARN_ON(desc); - return -ENOSYS; -} -static inline int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, - unsigned long flags) -{ - WARN_ON(desc); - return -ENOSYS; -} static inline int gpiod_get_value(const struct gpio_desc *desc) { /* GPIO can never have been requested */ @@ -375,10 +361,11 @@ static inline int gpiod_get_array_value(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_value(struct gpio_desc *desc, int value) +static inline int gpiod_set_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -404,10 +391,11 @@ static inline int gpiod_get_raw_array_value(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) +static inline int gpiod_set_raw_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -434,10 +422,11 @@ static inline int gpiod_get_array_value_cansleep(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) +static inline int gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -463,11 +452,12 @@ static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, - int value) +static inline int gpiod_set_raw_value_cansleep(struct gpio_desc *desc, + int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -560,6 +550,31 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, #endif /* CONFIG_GPIOLIB */ +#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_HTE) +int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); +int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); +#else + +#include <linux/bug.h> + +static inline int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, + unsigned long flags) +{ + if (!IS_ENABLED(CONFIG_GPIOLIB)) + WARN_ON(desc); + + return -ENOSYS; +} +static inline int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, + unsigned long flags) +{ + if (!IS_ENABLED(CONFIG_GPIOLIB)) + WARN_ON(desc); + + return -ENOSYS; +} +#endif /* CONFIG_GPIOLIB && CONFIG_HTE */ + static inline struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, struct fwnode_handle *fwnode, @@ -608,8 +623,6 @@ int devm_acpi_dev_add_driver_gpios(struct device *dev, #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ -#include <linux/err.h> - static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, const struct acpi_gpio_mapping *gpios) { @@ -635,8 +648,6 @@ void gpiod_unexport(struct gpio_desc *desc); #else /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */ -#include <asm/errno.h> - static inline int gpiod_export(struct gpio_desc *desc, bool direction_may_change) { @@ -655,4 +666,14 @@ static inline void gpiod_unexport(struct gpio_desc *desc) #endif /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */ +static inline int gpiod_multi_set_value_cansleep(struct gpio_descs *descs, + unsigned long *value_bitmap) +{ + if (IS_ERR_OR_NULL(descs)) + return PTR_ERR_OR_ZERO(descs); + + return gpiod_set_array_value_cansleep(descs->ndescs, descs->desc, + descs->info, value_bitmap); +} + #endif diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 2dd7cb9cc270..4c0294a9104d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -14,6 +14,7 @@ #include <linux/property.h> #include <linux/spinlock_types.h> #include <linux/types.h> +#include <linux/util_macros.h> #ifdef CONFIG_GENERIC_MSI_IRQ #include <asm/msi.h> @@ -328,7 +329,8 @@ struct gpio_irq_chip { * @fwnode: optional fwnode providing this controller's properties * @owner: helps prevent removal of modules exporting active GPIOs * @request: optional hook for chip-specific activation, such as - * enabling module power and clock; may sleep + * enabling module power and clock; may sleep; must return 0 on success + * or negative error number on failure * @free: optional hook for chip-specific deactivation, such as * disabling module power and clock; may sleep * @get_direction: returns direction for signal "offset", 0=out, 1=in, @@ -344,10 +346,15 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set: assigns output value for signal "offset" - * @set_multiple: assigns output values for multiple signals defined by "mask" + * @set: **DEPRECATED** - please use set_rv() instead + * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead + * @set_rv: assigns output value for signal "offset", returns 0 on success or + * negative error value + * @set_multiple_rv: assigns output values for multiple signals defined by + * "mask", returns 0 on success or negative error value * @set_config: optional hook for all kinds of settings. Uses the same - * packed config format as generic pinconf. + * packed config format as generic pinconf. Must return 0 on success and + * a negative error number on failure. * @to_irq: optional hook supporting non-static gpiod_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code @@ -394,6 +401,7 @@ struct gpio_irq_chip { * @reg_dir_in: direction in setting register for generic GPIO * @bgpio_dir_unreadable: indicates that the direction register(s) cannot * be read and we need to rely on out internal state tracking. + * @bgpio_pinctrl: the generic GPIO uses a pin control backend. * @bgpio_bits: number of register bits used for a generic GPIO i.e. * <register width> * 8 * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep @@ -441,6 +449,12 @@ struct gpio_chip { void (*set_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); + int (*set_rv)(struct gpio_chip *gc, + unsigned int offset, + int value); + int (*set_multiple_rv)(struct gpio_chip *gc, + unsigned long *mask, + unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); @@ -478,6 +492,7 @@ struct gpio_chip { void __iomem *reg_dir_out; void __iomem *reg_dir_in; bool bgpio_dir_unreadable; + bool bgpio_pinctrl; int bgpio_bits; raw_spinlock_t bgpio_lock; unsigned long bgpio_data; @@ -499,14 +514,6 @@ struct gpio_chip { struct gpio_irq_chip irq; #endif /* CONFIG_GPIOLIB_IRQCHIP */ - /** - * @valid_mask: - * - * If not %NULL, holds bitmask of GPIOs which are valid to be used - * from the chip. - */ - unsigned long *valid_mask; - #if defined(CONFIG_OF_GPIO) /* * If CONFIG_OF_GPIO is enabled, then all GPIO controllers described in @@ -516,11 +523,33 @@ struct gpio_chip { /** * @of_gpio_n_cells: * - * Number of cells used to form the GPIO specifier. + * Number of cells used to form the GPIO specifier. The standard is 2 + * cells: + * + * gpios = <&gpio offset flags>; + * + * some complex GPIO controllers instantiate more than one chip per + * device tree node and have 3 cells: + * + * gpios = <&gpio instance offset flags>; + * + * Legacy GPIO controllers may even have 1 cell: + * + * gpios = <&gpio offset>; */ unsigned int of_gpio_n_cells; /** + * @of_node_instance_match: + * + * Determine if a chip is the right instance. Must be implemented by + * any driver using more than one gpio_chip per device tree node. + * Returns true if gc is the instance indicated by i (which is the + * first cell in the phandles for GPIO lines and gpio-ranges). + */ + bool (*of_node_instance_match)(struct gpio_chip *gc, unsigned int i); + + /** * @of_xlate: * * Callback to translate a device tree GPIO specifier into a chip- @@ -550,19 +579,29 @@ DEFINE_CLASS(_gpiochip_for_each_data, const char **label, int *i) /** + * for_each_hwgpio_in_range - Iterates over all GPIOs in a given range + * @_chip: Chip to iterate over. + * @_i: Loop counter. + * @_base: First GPIO in the ranger. + * @_size: Amount of GPIOs to check starting from @base. + * @_label: Place to store the address of the label if the GPIO is requested. + * Set to NULL for unused GPIOs. + */ +#define for_each_hwgpio_in_range(_chip, _i, _base, _size, _label) \ + for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ + _i < _size; \ + _i++, kfree(_label), _label = NULL) \ + for_each_if(!IS_ERR(_label = gpiochip_dup_line_label(_chip, _base + _i))) + +/** * for_each_hwgpio - Iterates over all GPIOs for given chip. * @_chip: Chip to iterate over. * @_i: Loop counter. * @_label: Place to store the address of the label if the GPIO is requested. * Set to NULL for unused GPIOs. */ -#define for_each_hwgpio(_chip, _i, _label) \ - for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ - *_data.i < _chip->ngpio; \ - (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ - if (IS_ERR(*_data.label = \ - gpiochip_dup_line_label(_chip, *_data.i))) {} \ - else +#define for_each_hwgpio(_chip, _i, _label) \ + for_each_hwgpio_in_range(_chip, _i, 0, _chip->ngpio, _label) /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range @@ -573,13 +612,8 @@ DEFINE_CLASS(_gpiochip_for_each_data, * @_label: label of current GPIO */ #define for_each_requested_gpio_in_range(_chip, _i, _base, _size, _label) \ - for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ - *_data.i < _size; \ - (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ - if ((*_data.label = \ - gpiochip_dup_line_label(_chip, _base + *_data.i)) == NULL) {} \ - else if (IS_ERR(*_data.label)) {} \ - else + for_each_hwgpio_in_range(_chip, _i, _base, _size, _label) \ + for_each_if(_label) /* Iterates over all requested GPIO of the given @chip */ #define for_each_requested_gpio(chip, i, label) \ @@ -678,6 +712,7 @@ bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset); /* Sleep persistence inquiry for drivers */ bool gpiochip_line_is_persistent(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset); +const unsigned long *gpiochip_query_valid_mask(const struct gpio_chip *gc); /* get driver data */ void *gpiochip_get_data(struct gpio_chip *gc); @@ -713,6 +748,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) +#define BGPIOF_PINCTRL_BACKEND BIT(7) /* Call pinctrl direction setters */ #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, @@ -865,7 +901,7 @@ static inline void gpiochip_unlock_as_irq(struct gpio_chip *gc, #define for_each_gpiochip_node(dev, child) \ device_for_each_child_node(dev, child) \ - if (!fwnode_property_present(child, "gpio-controller")) {} else + for_each_if(fwnode_property_present(child, "gpio-controller")) static inline unsigned int gpiochip_node_count(struct device *dev) { diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h index a9f7b7faf57b..c722c67668c6 100644 --- a/include/linux/gpio/regmap.h +++ b/include/linux/gpio/regmap.h @@ -21,7 +21,7 @@ struct regmap; * If not given, the fwnode of the parent is used. * @label: (Optional) Descriptive name for GPIO controller. * If not given, the name of the device is used. - * @ngpio: Number of GPIOs + * @ngpio: (Optional) Number of GPIOs * @names: (Optional) Array of names for gpios * @reg_dat_base: (Optional) (in) register base address * @reg_set_base: (Optional) set register base address @@ -30,7 +30,7 @@ struct regmap; * @reg_dir_out_base: (Optional) out setting register base address * @reg_stride: (Optional) May be set if the registers (of the * same type, dat, set, etc) are not consecutive. - * @ngpio_per_reg: Number of GPIOs per register + * @ngpio_per_reg: (Optional) Number of GPIOs per register * @irq_domain: (Optional) IRQ domain if the controller is * interrupt-capable * @reg_mask_xlate: (Optional) Translates base address and GPIO diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 455f855bc084..96bda41d9148 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -445,7 +445,6 @@ ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size); ssize_t hdmi_infoframe_pack_only(const union hdmi_infoframe *frame, void *buffer, size_t size); -int hdmi_infoframe_check(union hdmi_infoframe *frame); int hdmi_infoframe_unpack(union hdmi_infoframe *frame, const void *buffer, size_t size); void hdmi_infoframe_log(const char *level, struct device *dev, diff --git a/include/linux/hid-over-i2c.h b/include/linux/hid-over-i2c.h new file mode 100644 index 000000000000..3b1a0208a6b8 --- /dev/null +++ b/include/linux/hid-over-i2c.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2024 Intel Corporation */ + +#include <linux/bits.h> + +#ifndef _HID_OVER_I2C_H_ +#define _HID_OVER_I2C_H_ + +#define HIDI2C_REG_LEN sizeof(__le16) + +/* Input report type definition in HIDI2C protocol */ +enum hidi2c_report_type { + HIDI2C_RESERVED = 0, + HIDI2C_INPUT, + HIDI2C_OUTPUT, + HIDI2C_FEATURE, +}; + +/* Power state type definition in HIDI2C protocol */ +enum hidi2c_power_state { + HIDI2C_ON, + HIDI2C_SLEEP, +}; + +/* Opcode type definition in HIDI2C protocol */ +enum hidi2c_opcode { + HIDI2C_RESET = 1, + HIDI2C_GET_REPORT, + HIDI2C_SET_REPORT, + HIDI2C_GET_IDLE, + HIDI2C_SET_IDLE, + HIDI2C_GET_PROTOCOL, + HIDI2C_SET_PROTOCOL, + HIDI2C_SET_POWER, +}; + +/** + * struct hidi2c_report_packet - Report packet definition in HIDI2C protocol + * @len: data field length + * @data: HIDI2C report packet data + */ +struct hidi2c_report_packet { + __le16 len; + u8 data[]; +} __packed; + +#define HIDI2C_LENGTH_LEN sizeof(__le16) + +#define HIDI2C_PACKET_LEN(data_len) ((data_len) + HIDI2C_LENGTH_LEN) +#define HIDI2C_DATA_LEN(pkt_len) ((pkt_len) - HIDI2C_LENGTH_LEN) + +#define HIDI2C_CMD_MAX_RI 0x0F + +/** + * HIDI2C command data packet - Command packet definition in HIDI2C protocol + * @report_id: [0:3] report id (<15) for features or output reports + * @report_type: [4:5] indicate report type, reference to hidi2c_report_type + * @reserved0: [6:7] reserved bits + * @opcode: [8:11] command operation code, reference to hidi2c_opcode + * @reserved1: [12:15] reserved bits + * @report_id_optional: [23:16] appended 3rd byte. + * If the report_id in the low byte is set to the + * sentinel value (HIDI2C_CMD_MAX_RI), then this + * optional third byte represents the report id (>=15) + * Otherwise, not this 3rd byte. + */ + +#define HIDI2C_CMD_LEN sizeof(__le16) +#define HIDI2C_CMD_LEN_OPT (sizeof(__le16) + 1) +#define HIDI2C_CMD_REPORT_ID GENMASK(3, 0) +#define HIDI2C_CMD_REPORT_TYPE GENMASK(5, 4) +#define HIDI2C_CMD_OPCODE GENMASK(11, 8) +#define HIDI2C_CMD_OPCODE GENMASK(11, 8) +#define HIDI2C_CMD_3RD_BYTE GENMASK(23, 16) + +#define HIDI2C_HID_DESC_BCDVERSION 0x100 + +/** + * struct hidi2c_dev_descriptor - HIDI2C device descriptor definition + * @dev_desc_len: The length of the complete device descriptor, fixed to 0x1E (30). + * @bcd_ver: The version number of the HIDI2C protocol supported. + * In binary coded decimal (BCD) format. + * @report_desc_len: The length of the report descriptor + * @report_desc_reg: The register address to retrieve report descriptor + * @input_reg: the register address to retrieve input report + * @max_input_len: The length of the largest possible HID input (or feature) report + * @output_reg: the register address to send output report + * @max_output_len: The length of the largest output (or feature) report + * @cmd_reg: the register address to send command + * @data_reg: the register address to send command data + * @vendor_id: Device manufacturers vendor ID + * @product_id: Device unique model/product ID + * @version_id: Device’s unique version + * @reserved0: Reserved and should be 0 + * @reserved1: Reserved and should be 0 + */ +struct hidi2c_dev_descriptor { + __le16 dev_desc_len; + __le16 bcd_ver; + __le16 report_desc_len; + __le16 report_desc_reg; + __le16 input_reg; + __le16 max_input_len; + __le16 output_reg; + __le16 max_output_len; + __le16 cmd_reg; + __le16 data_reg; + __le16 vendor_id; + __le16 product_id; + __le16 version_id; + __le16 reserved0; + __le16 reserved1; +} __packed; + +#define HIDI2C_DEV_DESC_LEN sizeof(struct hidi2c_dev_descriptor) + +#endif /* _HID_OVER_I2C_H_ */ diff --git a/include/linux/hid-over-spi.h b/include/linux/hid-over-spi.h new file mode 100644 index 000000000000..da5a14b5e89b --- /dev/null +++ b/include/linux/hid-over-spi.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2024 Intel Corporation */ + +#ifndef _HID_OVER_SPI_H_ +#define _HID_OVER_SPI_H_ + +#include <linux/bits.h> +#include <linux/types.h> + +/* Input report type definition in HIDSPI protocol */ +enum input_report_type { + INVALID_INPUT_REPORT_TYPE_0 = 0, + DATA = 1, + INVALID_TYPE_2 = 2, + RESET_RESPONSE = 3, + COMMAND_RESPONSE = 4, + GET_FEATURE_RESPONSE = 5, + INVALID_TYPE_6 = 6, + DEVICE_DESCRIPTOR_RESPONSE = 7, + REPORT_DESCRIPTOR_RESPONSE = 8, + SET_FEATURE_RESPONSE = 9, + OUTPUT_REPORT_RESPONSE = 10, + GET_INPUT_REPORT_RESPONSE = 11, + INVALID_INPUT_REPORT_TYPE = 0xF, +}; + +/* Output report type definition in HIDSPI protocol */ +enum output_report_type { + INVALID_OUTPUT_REPORT_TYPE_0 = 0, + DEVICE_DESCRIPTOR = 1, + REPORT_DESCRIPTOR = 2, + SET_FEATURE = 3, + GET_FEATURE = 4, + OUTPUT_REPORT = 5, + GET_INPUT_REPORT = 6, + COMMAND_CONTENT = 7, +}; + +/* Set power command ID for output report */ +#define HIDSPI_SET_POWER_CMD_ID 1 + +/* Power state definition in HIDSPI protocol */ +enum hidspi_power_state { + HIDSPI_ON = 1, + HIDSPI_SLEEP = 2, + HIDSPI_OFF = 3, +}; + +/** + * Input report header definition in HIDSPI protocol + * Report header size is 32bits, it includes: + * protocol_ver: [0:3] Current supported HIDSPI protocol version, must be 0x3 + * reserved0: [4:7] Reserved bits + * input_report_len: [8:21] Input report length in number bytes divided by 4 + * last_frag_flag: [22]Indicate if this packet is last fragment. + * 1 - indicates last fragment + * 0 - indicates additional fragments + * reserved1: [23] Reserved bits + * @sync_const: [24:31] Used to validate input report header, must be 0x5A + */ +#define HIDSPI_INPUT_HEADER_SIZE sizeof(u32) +#define HIDSPI_INPUT_HEADER_VER GENMASK(3, 0) +#define HIDSPI_INPUT_HEADER_REPORT_LEN GENMASK(21, 8) +#define HIDSPI_INPUT_HEADER_LAST_FLAG BIT(22) +#define HIDSPI_INPUT_HEADER_SYNC GENMASK(31, 24) + +/** + * struct input_report_body_header - Input report body header definition in HIDSPI protocol + * @input_report_type: indicate input report type, reference to enum input_report_type + * @content_len: this input report body packet length + * @content_id: indicate this input report's report id + */ +struct input_report_body_header { + u8 input_report_type; + __le16 content_len; + u8 content_id; +} __packed; + +#define HIDSPI_INPUT_BODY_HEADER_SIZE sizeof(struct input_report_body_header) + +/** + * struct input_report_body - Input report body definition in HIDSPI protocol + * @body_hdr: input report body header + * @content: input report body content + */ +struct input_report_body { + struct input_report_body_header body_hdr; + u8 content[]; +} __packed; + +#define HIDSPI_INPUT_BODY_SIZE(content_len) ((content_len) + HIDSPI_INPUT_BODY_HEADER_SIZE) + +/** + * struct output_report_header - Output report header definition in HIDSPI protocol + * @report_type: output report type, reference to enum output_report_type + * @content_len: length of content + * @content_id: 0x00 - descriptors + * report id - Set/Feature feature or Input/Output Reports + * command opcode - for commands + */ +struct output_report_header { + u8 report_type; + __le16 content_len; + u8 content_id; +} __packed; + +#define HIDSPI_OUTPUT_REPORT_HEADER_SIZE sizeof(struct output_report_header) + +/** + * struct output_report - Output report definition in HIDSPI protocol + * @output_hdr: output report header + * @content: output report content + */ +struct output_report { + struct output_report_header output_hdr; + u8 content[]; +} __packed; + +#define HIDSPI_OUTPUT_REPORT_SIZE(content_len) ((content_len) + HIDSPI_OUTPUT_REPORT_HEADER_SIZE) + +/** + * struct hidspi_dev_descriptor - HIDSPI device descriptor definition + * @dev_desc_len: The length of the complete device descriptor, fixed to 0x18 (24). + * @bcd_ver: The version number of the HIDSPI protocol supported. + * In binary coded decimal (BCD) format. Must be fixed to 0x0300. + * @rep_desc_len: The length of the report descriptor + * @max_input_len: The length of the largest possible HID input (or feature) report + * @max_output_len: The length of the largest output (or feature) report + * @max_frag_len: The length of the largest fragment, where a fragment represents + * the body of an input report. + * @vendor_id: Device manufacturers vendor ID + * @product_id: Device unique model/product ID + * @version_id: Device’s unique version + * @flags: Specify flags for the device’s operation + * @reserved: Reserved and should be 0 + */ +struct hidspi_dev_descriptor { + __le16 dev_desc_len; + __le16 bcd_ver; + __le16 rep_desc_len; + __le16 max_input_len; + __le16 max_output_len; + __le16 max_frag_len; + __le16 vendor_id; + __le16 product_id; + __le16 version_id; + __le16 flags; + __le32 reserved; +}; + +#define HIDSPI_DEVICE_DESCRIPTOR_SIZE sizeof(struct hidspi_dev_descriptor) +#define HIDSPI_INPUT_DEVICE_DESCRIPTOR_SIZE \ + (HIDSPI_INPUT_BODY_HEADER_SIZE + HIDSPI_DEVICE_DESCRIPTOR_SIZE) + +#endif /* _HID_OVER_SPI_H_ */ diff --git a/include/linux/hid.h b/include/linux/hid.h index cdc0dc13c87f..daae1d6d11a7 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -81,6 +81,8 @@ struct hid_item { #define HID_MAIN_ITEM_TAG_FEATURE 11 #define HID_MAIN_ITEM_TAG_BEGIN_COLLECTION 10 #define HID_MAIN_ITEM_TAG_END_COLLECTION 12 +#define HID_MAIN_ITEM_TAG_RESERVED_MIN 13 +#define HID_MAIN_ITEM_TAG_RESERVED_MAX 15 /* * HID report descriptor main item contents @@ -738,8 +740,9 @@ struct hid_descriptor { __le16 bcdHID; __u8 bCountryCode; __u8 bNumDescriptors; + struct hid_class_descriptor rpt_desc; - struct hid_class_descriptor desc[1]; + struct hid_class_descriptor opt_descs[]; } __attribute__ ((packed)); #define HID_DEVICE(b, g, ven, prod) \ @@ -1222,12 +1225,6 @@ unsigned long hid_lookup_quirk(const struct hid_device *hdev); int hid_quirks_init(char **quirks_param, __u16 bus, int count); void hid_quirks_exit(__u16 bus); -#ifdef CONFIG_HID_PID -int hid_pidff_init(struct hid_device *hid); -#else -#define hid_pidff_init NULL -#endif - #define dbg_hid(fmt, ...) pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__) #define hid_err(hid, fmt, ...) \ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90e..c698f8415675 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 6dbd0d49628f..99fcf65d575f 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -97,6 +97,8 @@ /* page number for queue file region */ #define QM_DOORBELL_PAGE_NR 1 +#define QM_DEV_ALG_MAX_LEN 256 + /* uacce mode of the driver */ #define UACCE_MODE_NOUACCE 0 /* don't use uacce */ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ @@ -122,6 +124,7 @@ enum qm_hw_ver { QM_HW_V1 = 0x20, QM_HW_V2 = 0x21, QM_HW_V3 = 0x30, + QM_HW_V4 = 0x50, }; enum qm_fun_type { @@ -156,6 +159,7 @@ enum qm_cap_bits { QM_SUPPORT_MB_COMMAND, QM_SUPPORT_SVA_PREFETCH, QM_SUPPORT_RPM, + QM_SUPPORT_DAE, }; struct qm_dev_alg { @@ -266,6 +270,8 @@ struct hisi_qm_err_ini { void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); enum acc_err_result (*get_err_result)(struct hisi_qm *qm); + bool (*dev_is_abnormal)(struct hisi_qm *qm); + int (*set_priv_status)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { @@ -392,6 +398,8 @@ struct hisi_qm { struct mutex mailbox_lock; + struct mutex ifc_lock; + const struct hisi_qm_hw_ops *ops; struct qm_debug debug; diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f7bfdcf0dda3..1ef867bb8c44 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -223,11 +223,14 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) } #endif +static inline enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused) +{ + return HRTIMER_NORESTART; +} + /* Exported timer functions: */ /* Initialize timers: */ -extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode); extern void hrtimer_setup_on_stack(struct hrtimer *timer, @@ -333,6 +336,7 @@ static inline int hrtimer_callback_running(struct hrtimer *timer) static inline void hrtimer_update_function(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *)) { +#ifdef CONFIG_PROVE_LOCKING guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock); if (WARN_ON_ONCE(hrtimer_is_queued(timer))) @@ -340,8 +344,8 @@ static inline void hrtimer_update_function(struct hrtimer *timer, if (WARN_ON_ONCE(!function)) return; - - timer->function = function; +#endif + ACCESS_PRIVATE(timer, function) = function; } /* Forward a hrtimer so it expires after now: */ diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index ad66a3081735..8fbbb6bdf7a1 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -34,12 +34,12 @@ enum hrtimer_restart { * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. * - * The hrtimer structure must be initialized by hrtimer_init() + * The hrtimer structure must be initialized by hrtimer_setup() */ struct hrtimer { struct timerqueue_node node; ktime_t _softexpires; - enum hrtimer_restart (*function)(struct hrtimer *); + enum hrtimer_restart (*__private function)(struct hrtimer *); struct hrtimer_clock_base *base; u8 state; u8 is_rel; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b94c2e8ee918..e893d546a49f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, + bool write); +vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, + bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_UNSUPPORTED, @@ -121,6 +125,8 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, MTHP_STAT_ZSWPOUT, MTHP_STAT_SWPIN, + MTHP_STAT_SWPIN_FALLBACK, + MTHP_STAT_SWPIN_FALLBACK_CHARGE, MTHP_STAT_SWPOUT, MTHP_STAT_SWPOUT_FALLBACK, MTHP_STAT_SHMEM_ALLOC, @@ -339,6 +345,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); +bool uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +int folio_split(struct folio *folio, unsigned int new_order, struct page *page, + struct list_head *list); +/* + * try_folio_split - try to split a @folio at @page using non uniform split. + * @folio: folio to be split + * @page: split to order-0 at the given page + * @list: store the after-split folios + * + * Try to split a @folio at @page using non uniform split to order-0, if + * non uniform split is not supported, fall back to uniform split. + * + * Return: 0: split is successful, otherwise split failed. + */ +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + int ret = min_order_for_split(folio); + + if (ret < 0) + return ret; + + if (!non_uniform_split_supported(folio, 0, false)) + return split_huge_page_to_list_to_order(&folio->page, list, + ret); + return folio_split(folio, ret, page, list); +} static inline int split_huge_page(struct page *page) { struct folio *folio = page_folio(page); @@ -402,7 +438,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, - unsigned long end, long adjust_next); + unsigned long end, struct vm_area_struct *next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); @@ -531,6 +567,12 @@ static inline int split_folio_to_list(struct folio *folio, struct list_head *lis return 0; } +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + return 0; +} + static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) @@ -569,7 +611,7 @@ static inline int madvise_collapse(struct vm_area_struct *vma, static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, - long adjust_next) + struct vm_area_struct *next) { } static inline int is_swap_pmd(pmd_t pmd) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0385103dffcd..9a93e6dbb95e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -153,11 +153,11 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -bool isolate_hugetlb(struct folio *folio, struct list_head *list); +bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); -void folio_putback_active_hugetlb(struct folio *folio); +void folio_putback_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; @@ -174,6 +174,9 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages[MAX_NUMNODES]; +void hugetlb_bootmem_alloc(void); +bool hugetlb_bootmem_allocated(void); + /* arch callbacks */ #ifndef CONFIG_HIGHPTE @@ -272,6 +275,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); #else /* !CONFIG_HUGETLB_PAGE */ @@ -414,7 +419,7 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) +static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list) { return false; } @@ -430,7 +435,7 @@ static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, return 0; } -static inline void folio_putback_active_hugetlb(struct folio *folio) +static inline void folio_putback_hugetlb(struct folio *folio) { } @@ -465,6 +470,12 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write @@ -588,6 +599,7 @@ enum hugetlb_page_flags { HPG_freed, HPG_vmemmap_optimized, HPG_raw_hwp_unreliable, + HPG_cma, __NR_HPAGEFLAGS, }; @@ -647,6 +659,7 @@ HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) +HPAGEFLAG(Cma, cma) #ifdef CONFIG_HUGETLB_PAGE @@ -675,14 +688,26 @@ struct hstate { char name[HSTATE_NAME_LEN]; }; +struct cma; + struct huge_bootmem_page { struct list_head list; struct hstate *hstate; + unsigned long flags; + struct cma *cma; }; +#define HUGE_BOOTMEM_HVO 0x0001 +#define HUGE_BOOTMEM_ZONES_VALID 0x0002 +#define HUGE_BOOTMEM_CMA 0x0004 + +bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); + int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); +int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); +void wait_for_freed_hugetlb_folios(void); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, - unsigned long addr, int avoid_reserve); + unsigned long addr, bool cow_from_owner); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback); @@ -813,6 +838,17 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, } #endif +#ifndef arch_has_huge_bootmem_alloc +/* + * Some architectures do their own bootmem allocation, so they can't use + * early CMA allocation. + */ +static inline bool arch_has_huge_bootmem_alloc(void) +{ + return false; +} +#endif + static inline struct hstate *folio_hstate(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); @@ -1061,9 +1097,19 @@ static inline int isolate_or_dissolve_huge_page(struct page *page, return -ENOMEM; } +static inline int replace_free_hugepage_folios(unsigned long start_pfn, + unsigned long end_pfn) +{ + return 0; +} + +static inline void wait_for_freed_hugetlb_folios(void) +{ +} + static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, - int avoid_reserve) + bool cow_from_owner) { return NULL; } @@ -1245,6 +1291,15 @@ static inline bool hugetlbfs_pagecache_present( { return false; } + +static inline void hugetlb_bootmem_alloc(void) +{ +} + +static inline bool hugetlb_bootmem_allocated(void) +{ + return false; +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index f0231dbc4777..f35b42e8c5de 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -58,11 +58,9 @@ struct hwspinlock_pdata { int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev, const struct hwspinlock_ops *ops, int base_id, int num_locks); int hwspin_lock_unregister(struct hwspinlock_device *bank); -struct hwspinlock *hwspin_lock_request(void); struct hwspinlock *hwspin_lock_request_specific(unsigned int id); int hwspin_lock_free(struct hwspinlock *hwlock); int of_hwspin_lock_get_id(struct device_node *np, int index); -int hwspin_lock_get_id(struct hwspinlock *hwlock); int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, unsigned long *); int __hwspin_trylock(struct hwspinlock *, int, unsigned long *); @@ -70,7 +68,6 @@ void __hwspin_unlock(struct hwspinlock *, int, unsigned long *); int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name); int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id); int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock); -struct hwspinlock *devm_hwspin_lock_request(struct device *dev); struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev, unsigned int id); int devm_hwspin_lock_unregister(struct device *dev, @@ -95,11 +92,6 @@ int devm_hwspin_lock_register(struct device *dev, * Note: ERR_PTR(-ENODEV) will still be considered a success for NULL-checking * users. Others, which care, can still check this with IS_ERR. */ -static inline struct hwspinlock *hwspin_lock_request(void) -{ - return ERR_PTR(-ENODEV); -} - static inline struct hwspinlock *hwspin_lock_request_specific(unsigned int id) { return ERR_PTR(-ENODEV); @@ -138,11 +130,6 @@ static inline int of_hwspin_lock_get_id(struct device_node *np, int index) return 0; } -static inline int hwspin_lock_get_id(struct hwspinlock *hwlock) -{ - return 0; -} - static inline int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name) { @@ -155,11 +142,6 @@ int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock) return 0; } -static inline struct hwspinlock *devm_hwspin_lock_request(struct device *dev) -{ - return ERR_PTR(-ENODEV); -} - static inline struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev, unsigned int id) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 02a226bcf0ed..b52ac40d5830 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -24,7 +24,7 @@ #include <linux/mod_devicetable.h> #include <linux/interrupt.h> #include <linux/reciprocal_div.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #define MAX_PAGE_BUFFER_COUNT 32 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */ @@ -371,19 +371,6 @@ struct vmtransfer_page_packet_header { struct vmtransfer_page_range ranges[]; } __packed; -struct vmgpadl_packet_header { - struct vmpacket_descriptor d; - u32 gpadl; - u32 reserved; -} __packed; - -struct vmadd_remove_transfer_page_set { - struct vmpacket_descriptor d; - u32 gpadl; - u16 xfer_pageset_id; - u16 reserved; -} __packed; - /* * This structure defines a range in guest physical space that can be made to * look virtually contiguous. @@ -395,30 +382,6 @@ struct gpa_range { }; /* - * This is the format for an Establish Gpadl packet, which contains a handle by - * which this GPADL will be known and a set of GPA ranges associated with it. - * This can be converted to a MDL by the guest OS. If there are multiple GPA - * ranges, then the resulting MDL will be "chained," representing multiple VA - * ranges. - */ -struct vmestablish_gpadl { - struct vmpacket_descriptor d; - u32 gpadl; - u32 range_cnt; - struct gpa_range range[1]; -} __packed; - -/* - * This is the format for a Teardown Gpadl packet, which indicates that the - * GPADL handle in the Establish Gpadl packet will never be referenced again. - */ -struct vmteardown_gpadl { - struct vmpacket_descriptor d; - u32 gpadl; - u32 reserved; /* for alignment to a 8-byte boundary */ -} __packed; - -/* * This is the format for a GPA-Direct packet, which contains a set of GPA * ranges, in addition to commands and/or data. */ @@ -429,25 +392,6 @@ struct vmdata_gpa_direct { struct gpa_range range[1]; } __packed; -/* This is the format for a Additional Data Packet. */ -struct vmadditional_data { - struct vmpacket_descriptor d; - u64 total_bytes; - u32 offset; - u32 byte_cnt; - unsigned char data[1]; -} __packed; - -union vmpacket_largest_possible_header { - struct vmpacket_descriptor simple_hdr; - struct vmtransfer_page_packet_header xfer_page_hdr; - struct vmgpadl_packet_header gpadl_hdr; - struct vmadd_remove_transfer_page_set add_rm_xfer_page_hdr; - struct vmestablish_gpadl establish_gpadl_hdr; - struct vmteardown_gpadl teardown_gpadl_hdr; - struct vmdata_gpa_direct data_gpa_direct_hdr; -}; - #define VMPACKET_DATA_START_ADDRESS(__packet) \ (void *)(((unsigned char *)__packet) + \ ((struct vmpacket_descriptor)__packet)->offset8 * 8) @@ -768,15 +712,6 @@ struct vmbus_close_msg { struct vmbus_channel_close_channel msg; }; -/* Define connection identifier type. */ -union hv_connection_id { - u32 asu32; - struct { - u32 id:24; - u32 reserved:8; - } u; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -1067,6 +1002,12 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; + + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + + /* boolean to control visibility of sysfs for ring buffer */ + bool ring_sysfs_visible; }; #define lock_requestor(channel, flags) \ @@ -1226,13 +1167,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, @@ -1670,6 +1604,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id, const guid_t *shv_host_servie_id); int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp); void vmbus_set_event(struct vmbus_channel *channel); +int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu); /* Get the start of the ring buffer. */ static inline void * diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 388ce71a29a9..2e4903b7f7bc 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -321,6 +321,8 @@ struct i2c_driver { * calls it to pass on slave events to the slave driver. * @devres_group_id: id of the devres group that will be created for resources * acquired when probing this device. + * @debugfs: pointer to the debugfs subdirectory which the I2C core created + * for this client. * * An i2c_client identifies a single device (i.e. chip) connected to an * i2c bus. The behaviour exposed to Linux is defined by the driver @@ -350,6 +352,7 @@ struct i2c_client { i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif void *devres_group_id; /* ID of probe devres group */ + struct dentry *debugfs; /* per-client debugfs dir */ }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) @@ -949,6 +952,21 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) return (msg->addr << 1) | (msg->flags & I2C_M_RD); } +/* + * 10-bit address + * addr_1: 5'b11110 | addr[9:8] | (R/nW) + * addr_2: addr[7:0] + */ +static inline u8 i2c_10bit_addr_hi_from_msg(const struct i2c_msg *msg) +{ + return 0xf0 | ((msg->addr & GENMASK(9, 8)) >> 7) | (msg->flags & I2C_M_RD); +} + +static inline u8 i2c_10bit_addr_lo_from_msg(const struct i2c_msg *msg) +{ + return msg->addr & GENMASK(7, 0); +} + u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred); @@ -1026,10 +1044,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node return i2c_get_adapter_by_fwnode(of_fwnode_handle(node)); } -const struct of_device_id -*i2c_of_match_device(const struct of_device_id *matches, - struct i2c_client *client); - int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info); @@ -1050,13 +1064,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node return NULL; } -static inline const struct of_device_id -*i2c_of_match_device(const struct of_device_id *matches, - struct i2c_client *client) -{ - return NULL; -} - static inline int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info) diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 0a8a44ac2f02..b674f64d0822 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -283,7 +283,7 @@ static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, * module_i3c_i2c_driver() - Register a module providing an I3C and an I2C * driver * @__i3cdrv: the I3C driver to register - * @__i2cdrv: the I3C driver to register + * @__i2cdrv: the I2C driver to register * * Provide generic init/exit functions that simply register/unregister an I3C * and an I2C driver. diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 12d532b012c5..c67922ece617 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -475,7 +475,7 @@ struct i3c_master_controller_ops { int (*attach_i2c_dev)(struct i2c_dev_desc *dev); void (*detach_i2c_dev)(struct i2c_dev_desc *dev); int (*i2c_xfers)(struct i2c_dev_desc *dev, - const struct i2c_msg *xfers, int nxfers); + struct i2c_msg *xfers, int nxfers); int (*request_ibi)(struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void (*free_ibi)(struct i3c_dev_desc *dev); diff --git a/include/linux/i8042.h b/include/linux/i8042.h index 95b07f8b77fe..00037c13abc8 100644 --- a/include/linux/i8042.h +++ b/include/linux/i8042.h @@ -54,15 +54,29 @@ struct serio; +/** + * typedef i8042_filter_t - i8042 filter callback + * @data: Data received by the i8042 controller + * @str: Status register of the i8042 controller + * @serio: Serio of the i8042 controller + * @context: Context pointer associated with this callback + * + * This represents a i8042 filter callback which can be used with i8042_install_filter() + * and i8042_remove_filter() to filter the i8042 input for platform-specific key codes. + * + * Context: Interrupt context. + * Returns: true if the data should be filtered out, false if otherwise. + */ +typedef bool (*i8042_filter_t)(unsigned char data, unsigned char str, struct serio *serio, + void *context); + #if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE) void i8042_lock_chip(void); void i8042_unlock_chip(void); int i8042_command(unsigned char *param, int command); -int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)); -int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)); +int i8042_install_filter(i8042_filter_t filter, void *context); +int i8042_remove_filter(i8042_filter_t filter); #else @@ -79,14 +93,12 @@ static inline int i8042_command(unsigned char *param, int command) return -ENODEV; } -static inline int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)) +static inline int i8042_install_filter(i8042_filter_t filter, void *context) { return -ENODEV; } -static inline int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)) +static inline int i8042_remove_filter(i8042_filter_t filter) { return -ENODEV; } diff --git a/include/linux/idr.h b/include/linux/idr.h index da5f5fa4a3a6..2267902d29a7 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -15,6 +15,7 @@ #include <linux/radix-tree.h> #include <linux/gfp.h> #include <linux/percpu.h> +#include <linux/cleanup.h> struct idr { struct radix_tree_root idr_rt; @@ -124,6 +125,22 @@ void *idr_get_next_ul(struct idr *, unsigned long *nextid); void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); +struct __class_idr { + struct idr *idr; + int id; +}; + +#define idr_null ((struct __class_idr){ NULL, -1 }) +#define take_idr_id(id) __get_and_null(id, idr_null) + +DEFINE_CLASS(idr_alloc, struct __class_idr, + if (_T.id >= 0) idr_remove(_T.idr, _T.id), + ((struct __class_idr){ + .idr = idr, + .id = idr_alloc(idr, ptr, start, end, gfp), + }), + struct idr *idr, void *ptr, int start, int end, gfp_t gfp); + /** * idr_init_base() - Initialise an IDR. * @idr: IDR handle. @@ -257,6 +274,7 @@ struct ida { int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); +int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max); /** * ida_alloc() - Allocate an unused ID. @@ -328,4 +346,14 @@ static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } + +static inline bool ida_exists(struct ida *ida, unsigned int id) +{ + return ida_find_first_range(ida, id, id) == id; +} + +static inline int ida_find_first(struct ida *ida) +{ + return ida_find_first_range(ida, 0, ~0); +} #endif /* __IDR_H__ */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a96db2915aab..7edc3fb0641c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -111,6 +111,8 @@ /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 +#define IEEE80211_S1G_BCN_CSSID 0x200 +#define IEEE80211_S1G_BCN_ANO 0x400 /* see 802.11ah-2016 9.9 NDP CMAC frames */ #define IEEE80211_S1G_1MHZ_NDP_BITS 25 @@ -153,9 +155,6 @@ #define IEEE80211_ANO_NETTYPE_WILD 15 -/* bits unique to S1G beacon */ -#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 - /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 @@ -628,6 +627,42 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) } /** + * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * next TBTT field + */ +static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); +} + +/** + * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * ANO field + */ +static inline bool ieee80211_s1g_has_ano(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO)); +} + +/** + * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * compressed SSID field + */ +static inline bool ieee80211_s1g_has_cssid(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); +} + +/** * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an S1G short beacon, @@ -1245,16 +1280,40 @@ struct ieee80211_ext { u8 change_seq; u8 variable[0]; } __packed s1g_beacon; - struct { - u8 sa[ETH_ALEN]; - __le32 timestamp; - u8 change_seq; - u8 next_tbtt[3]; - u8 variable[0]; - } __packed s1g_short_beacon; } u; } __packed __aligned(2); +/** + * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields + * @fc: frame control bytes in little-endian byteorder + * Return: total length in bytes of the optional fixed-length fields + * + * S1G beacons may contain up to three optional fixed-length fields that + * precede the variable-length elements. Whether these fields are present + * is indicated by flags in the frame control field. + * + * From IEEE 802.11-2024 section 9.3.4.3: + * - Next TBTT field may be 0 or 3 bytes + * - Short SSID field may be 0 or 4 bytes + * - Access Network Options (ANO) field may be 0 or 1 byte + */ +static inline size_t +ieee80211_s1g_optional_len(__le16 fc) +{ + size_t len = 0; + + if (ieee80211_s1g_has_next_tbtt(fc)) + len += 3; + + if (ieee80211_s1g_has_cssid(fc)) + len += 4; + + if (ieee80211_s1g_has_ano(fc)) + len += 1; + + return len; +} + #define IEEE80211_TWT_CONTROL_NDP BIT(0) #define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1) #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3) @@ -1526,12 +1585,27 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { u8 action_code; } __packed ttlm_tear_down; + struct { + u8 action_code; + u8 dialog_token; + u8 variable[]; + } __packed ml_reconf_req; + struct { + u8 action_code; + u8 dialog_token; + u8 count; + u8 variable[]; + } __packed ml_reconf_resp; + struct { + u8 action_code; + u8 variable[]; + } __packed epcs; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ @@ -1542,11 +1616,13 @@ struct ieee80211_mgmt { #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 #define BSS_MEMBERSHIP_SELECTOR_GLK 125 -#define BSS_MEMBERSHIP_SELECTOR_EPS 124 +#define BSS_MEMBERSHIP_SELECTOR_EPD 124 #define BSS_MEMBERSHIP_SELECTOR_SAE_H2E 123 #define BSS_MEMBERSHIP_SELECTOR_HE_PHY 122 #define BSS_MEMBERSHIP_SELECTOR_EHT_PHY 121 +#define BSS_MEMBERSHIP_SELECTOR_MIN BSS_MEMBERSHIP_SELECTOR_EHT_PHY + /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) @@ -3096,6 +3172,11 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454 2 #define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 +#define IEEE80211_EHT_MAC_CAP1_EHT_TRS 0x02 +#define IEEE80211_EHT_MAC_CAP1_TXOP_RET 0x04 +#define IEEE80211_EHT_MAC_CAP1_TWO_BQRS 0x08 +#define IEEE80211_EHT_MAC_CAP1_EHT_LINK_ADAPT_MASK 0x30 +#define IEEE80211_EHT_MAC_CAP1_UNSOL_EPCS_PRIO_ACCESS 0x40 /* EHT PHY capabilities as defined in P802.11be_D2.0 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 @@ -3883,6 +3964,16 @@ enum ieee80211_protected_eht_actioncode { WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_REQ = 3, + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP = 4, + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN = 5, + WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF = 6, + WLAN_PROTECTED_EHT_ACTION_LINK_RECOMMEND = 7, + WLAN_PROTECTED_EHT_ACTION_ML_OP_UPDATE_REQ = 8, + WLAN_PROTECTED_EHT_ACTION_ML_OP_UPDATE_RESP = 9, + WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_NOTIF = 10, + WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ = 11, + WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP = 12, }; /* Security key length */ @@ -4961,6 +5052,7 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLC_BASIC_PRES_EML_CAPA 0x0080 #define IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP 0x0100 #define IEEE80211_MLC_BASIC_PRES_MLD_ID 0x0200 +#define IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP 0x0400 #define IEEE80211_MED_SYNC_DELAY_DURATION 0x00ff #define IEEE80211_MED_SYNC_DELAY_SYNC_OFDM_ED_THRESH 0x0f00 @@ -5018,6 +5110,8 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 +#define IEEE80211_MLD_CAP_OP_LINK_RECONF_SUPPORT 0x2000 +#define IEEE80211_MLD_CAP_OP_ALIGNED_TWT_SUPPORT 0x4000 struct ieee80211_mle_basic_common_info { u8 len; @@ -5033,6 +5127,9 @@ struct ieee80211_mle_preq_common_info { } __packed; #define IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR 0x0010 +#define IEEE80211_MLC_RECONF_PRES_EML_CAPA 0x0020 +#define IEEE80211_MLC_RECONF_PRES_MLD_CAPA_OP 0x0040 +#define IEEE80211_MLC_RECONF_PRES_EXT_MLD_CAPA_OP 0x0080 /* no fixed fields in RECONF */ @@ -5223,6 +5320,47 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) } /** + * ieee80211_mle_get_ext_mld_capa_op - returns the extended MLD capabilities + * and operations. + * @data: pointer to the multi-link element + * Return: the extended MLD capabilities and operations field value from + * the multi-link element, or 0 if not present + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + */ +static inline u16 ieee80211_mle_get_ext_mld_capa_op(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) + common += 1; + + return get_unaligned_le16(common); +} + +/** * ieee80211_mle_get_mld_id - returns the MLD ID * @data: pointer to the multi-link element * Return: The MLD ID in the given multi-link element, or 0 if not present @@ -5294,6 +5432,8 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP) + common += 2; break; case IEEE80211_ML_CONTROL_TYPE_PREQ: common += sizeof(struct ieee80211_mle_preq_common_info); @@ -5304,6 +5444,12 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) case IEEE80211_ML_CONTROL_TYPE_RECONF: if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) common += ETH_ALEN; + if (control & IEEE80211_MLC_RECONF_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_RECONF_PRES_MLD_CAPA_OP) + common += 2; + if (control & IEEE80211_MLC_RECONF_PRES_EXT_MLD_CAPA_OP) + common += 2; break; case IEEE80211_ML_CONTROL_TYPE_TDLS: common += sizeof(struct ieee80211_mle_tdls_common_info); @@ -5453,8 +5599,13 @@ ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta #define IEEE80211_MLE_STA_RECONF_CONTROL_COMPLETE_PROFILE 0x0010 #define IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 #define IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT 0x0040 -#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_UPDATE_TYPE 0x0780 -#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT 0x0800 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE 0x0780 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_AP_REM 0 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_OP_PARAM_UPDATE 1 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_ADD_LINK 2 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_DEL_LINK 3 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_NSTR_STATUS 4 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT 0x0800 /** * ieee80211_mle_reconf_sta_prof_size_ok - validate reconfiguration multi-link @@ -5487,6 +5638,9 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +#define IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID 0x000f +#define IEEE80211_EPCS_ENA_RESP_BODY_LEN 3 + static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) { const struct ieee80211_ttlm_elem *t2l = (const void *)data; diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 3ff96ae31bf6..c5fe3b2a53e8 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -65,11 +65,9 @@ struct br_ip_list { #define BR_DEFAULT_AGEING_TIME (300 * HZ) struct net_bridge; -void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, - unsigned int cmd, struct ifreq *ifr, +void brioctl_set(int (*hook)(struct net *net, unsigned int cmd, void __user *uarg)); -int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg); +int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg); #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 8a9792a6427a..61b7335aa037 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -19,6 +19,9 @@ #include <linux/skbuff.h> #include <uapi/linux/if_ether.h> +/* XX:XX:XX:XX:XX:XX */ +#define MAC_ADDR_STR_LEN (3 * ETH_ALEN - 1) + static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_mac_header(skb); diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 0404f5bf4f30..d7941fd88032 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -13,6 +13,15 @@ enum hsr_version { PRP_V1, }; +enum hsr_port_type { + HSR_PT_NONE = 0, /* Must be 0, used by framereg */ + HSR_PT_SLAVE_A, + HSR_PT_SLAVE_B, + HSR_PT_INTERLINK, + HSR_PT_MASTER, + HSR_PT_PORTS, /* This must be the last item in the enum */ +}; + /* HSR Tag. * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, @@ -32,6 +41,8 @@ struct hsr_tag { #if IS_ENABLED(CONFIG_HSR) extern bool is_hsr_master(struct net_device *dev); extern int hsr_get_version(struct net_device *dev, enum hsr_version *ver); +struct net_device *hsr_get_port_ndev(struct net_device *ndev, + enum hsr_port_type pt); #else static inline bool is_hsr_master(struct net_device *dev) { @@ -42,6 +53,12 @@ static inline int hsr_get_version(struct net_device *dev, { return -EINVAL; } + +static inline struct net_device *hsr_get_port_ndev(struct net_device *ndev, + enum hsr_port_type pt) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_HSR */ #endif /*_LINUX_IF_HSR_H_*/ diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 523025106a64..0f7281e3e448 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -59,8 +59,10 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, extern void macvlan_common_setup(struct net_device *dev); -extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +struct rtnl_newlink_params; + +extern int macvlan_common_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack); extern void macvlan_dellink(struct net_device *dev, struct list_head *head); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index d65b5d71b93b..38456b42cdb5 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -176,6 +176,7 @@ struct netpoll; * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry * @vlan_pcpu_stats: ptr to percpu rx stats + * @netpoll: netpoll instance "propagated" down to @real_dev */ struct vlan_dev_priv { unsigned int nr_ingress_mappings; @@ -310,7 +311,7 @@ static inline bool vlan_uses_dev(const struct net_device *dev) * eth_type_vlan - check for valid vlan ether type. * @ethertype: ether type to check * - * Returns true if the ether type is a vlan ether type. + * Returns: true if the ether type is a vlan ether type. */ static inline bool eth_type_vlan(__be16 ethertype) { @@ -341,9 +342,9 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, @@ -390,9 +391,9 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) @@ -414,6 +415,8 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * doesn't have to worry about freeing the original skb. * * Does not change skb->protocol so this function can be used during receive. + * + * Return: modified @skb on success, NULL on error (@skb is freed). */ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, @@ -443,6 +446,8 @@ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, * doesn't have to worry about freeing the original skb. * * Does not change skb->protocol so this function can be used during receive. + * + * Return: modified @skb on success, NULL on error (@skb is freed). */ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) @@ -461,6 +466,8 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. + * + * Return: modified @skb on success, NULL on error (@skb is freed). */ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, __be16 vlan_proto, @@ -533,7 +540,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not of VLAN type + * Returns: error if the skb is not of VLAN type */ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -551,7 +558,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if @skb->vlan_tci is not set correctly + * Returns: error if @skb->vlan_tci is not set correctly */ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, u16 *vlan_tci) @@ -570,7 +577,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not VLAN tagged + * Returns: error if the skb is not VLAN tagged */ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -582,13 +589,13 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) } /** - * vlan_get_protocol - get protocol EtherType. + * __vlan_get_protocol_offset() - get protocol EtherType. * @skb: skbuff to query * @type: first vlan protocol * @mac_offset: MAC offset * @depth: buffer to store length of eth and vlan tags in bytes * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 __vlan_get_protocol_offset(const struct sk_buff *skb, @@ -639,7 +646,7 @@ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) @@ -720,7 +727,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, * Expects the skb to contain a VLAN tag in the payload, and to have skb->data * pointing at the MAC header. * - * Returns a new pointer to skb->data, or NULL on failure to pull. + * Returns: a new pointer to skb->data, or NULL on failure to pull. */ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) { @@ -737,7 +744,7 @@ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) * skb_vlan_tagged - check if skb is vlan tagged. * @skb: skbuff to query * - * Returns true if the skb is tagged, regardless of whether it is hardware + * Returns: true if the skb is tagged, regardless of whether it is hardware * accelerated or not. */ static inline bool skb_vlan_tagged(const struct sk_buff *skb) @@ -753,7 +760,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb) * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. * @skb: skbuff to query * - * Returns true if the skb is tagged with multiple vlan headers, regardless + * Returns: true if the skb is tagged with multiple vlan headers, regardless * of whether it is hardware accelerated or not. */ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) @@ -784,7 +791,7 @@ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) * @skb: skbuff to query * @features: features to be checked * - * Returns features without unsafe ones if the skb has multiple tags. + * Returns: features without unsafe ones if the skb has multiple tags. */ static inline netdev_features_t vlan_features_check(struct sk_buff *skb, netdev_features_t features) @@ -808,9 +815,11 @@ static inline netdev_features_t vlan_features_check(struct sk_buff *skb, * @h1: Pointer to vlan header * @h2: Pointer to vlan header * - * Compare two vlan headers, returns 0 if equal. + * Compare two vlan headers. * * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits. + * + * Return: 0 if equal, arbitrary non-zero value if not equal. */ static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1, const struct vlan_hdr *h2) diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 5171231f70a8..073b30a9b850 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -87,6 +87,8 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + unsigned long mca_cstamp; + unsigned long mca_tstamp; struct rcu_head rcu; }; diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index f8c1d2505940..f242b285081b 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -29,6 +29,7 @@ struct ad_sd_calib_data { struct ad_sigma_delta; struct device; +struct gpio_desc; struct iio_dev; /** @@ -45,6 +46,7 @@ struct iio_dev; * modify or drop the sample data, it, may be NULL. * @has_registers: true if the device has writable and readable registers, false * if there is just one read-only sample data shift register. + * @has_named_irqs: Set to true if there is more than one IRQ line. * @addr_shift: Shift of the register address in the communications register. * @read_mask: Mask for the communications register having the read bit set. * @status_ch_mask: Mask for the channel number stored in status register. @@ -52,7 +54,7 @@ struct iio_dev; * be used. * @irq_flags: flags for the interrupt used by the triggered buffer * @num_slots: Number of sequencer slots - * @irq_line: IRQ for reading conversions. If 0, spi->irq will be used + * @num_resetclks: Number of SPI clk cycles with MOSI=1 to reset the chip. */ struct ad_sigma_delta_info { int (*set_channel)(struct ad_sigma_delta *, unsigned int channel); @@ -62,13 +64,14 @@ struct ad_sigma_delta_info { int (*disable_one)(struct ad_sigma_delta *, unsigned int chan); int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample); bool has_registers; + bool has_named_irqs; unsigned int addr_shift; unsigned int read_mask; unsigned int status_ch_mask; unsigned int data_reg; unsigned long irq_flags; unsigned int num_slots; - int irq_line; + unsigned int num_resetclks; }; /** @@ -85,6 +88,7 @@ struct ad_sigma_delta { /* private: */ struct completion completion; + spinlock_t irq_lock; /* protects .irq_dis and irq en/disable state */ bool irq_dis; bool bus_locked; @@ -96,7 +100,8 @@ struct ad_sigma_delta { unsigned int active_slots; unsigned int current_slot; unsigned int num_slots; - int irq_line; + struct gpio_desc *rdy_gpiod; + int irq_line; bool status_appended; /* map slots to channels in order to know what to expect from devices */ unsigned int *slots; @@ -178,8 +183,7 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int *val); -int ad_sd_reset(struct ad_sigma_delta *sigma_delta, - unsigned int reset_length); +int ad_sd_reset(struct ad_sigma_delta *sigma_delta); int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val); diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 10be00f3b120..e45b7dfbec35 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -70,6 +70,12 @@ enum iio_backend_sample_trigger { IIO_BACKEND_SAMPLE_TRIGGER_MAX }; +enum iio_backend_interface_type { + IIO_BACKEND_INTERFACE_SERIAL_LVDS, + IIO_BACKEND_INTERFACE_SERIAL_CMOS, + IIO_BACKEND_INTERFACE_MAX +}; + /** * struct iio_backend_ops - operations structure for an iio_backend * @enable: Enable backend. @@ -88,6 +94,9 @@ enum iio_backend_sample_trigger { * @extend_chan_spec: Extend an IIO channel. * @ext_info_set: Extended info setter. * @ext_info_get: Extended info getter. + * @interface_type_get: Interface type. + * @data_size_set: Data size. + * @oversampling_ratio_set: Set Oversampling ratio. * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. @@ -128,6 +137,11 @@ struct iio_backend_ops { const char *buf, size_t len); int (*ext_info_get)(struct iio_backend *back, uintptr_t private, const struct iio_chan_spec *chan, char *buf); + int (*interface_type_get)(struct iio_backend *back, + enum iio_backend_interface_type *type); + int (*data_size_set)(struct iio_backend *back, unsigned int size); + int (*oversampling_ratio_set)(struct iio_backend *back, + unsigned int ratio); int (*read_raw)(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); @@ -186,6 +200,11 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, const char *buf, size_t len); ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf); +int iio_backend_interface_type_get(struct iio_backend *back, + enum iio_backend_interface_type *type); +int iio_backend_data_size_set(struct iio_backend *back, unsigned int size); +int iio_backend_oversampling_ratio_set(struct iio_backend *back, + unsigned int ratio); int iio_backend_read_raw(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 81d9a19aeb91..37f27545f69f 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -11,8 +11,9 @@ struct iio_dev; struct device; +struct dma_chan; -void iio_dmaengine_buffer_free(struct iio_buffer *buffer); +void iio_dmaengine_buffer_teardown(struct iio_buffer *buffer); struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const char *channel, @@ -26,6 +27,10 @@ int devm_iio_dmaengine_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const char *channel, enum iio_buffer_direction dir); +int devm_iio_dmaengine_buffer_setup_with_handle(struct device *dev, + struct iio_dev *indio_dev, + struct dma_chan *chan, + enum iio_buffer_direction dir); #define devm_iio_dmaengine_buffer_setup(dev, indio_dev, channel) \ devm_iio_dmaengine_buffer_setup_ext(dev, indio_dev, channel, \ diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 418b1307d3f2..3b8d618bb3df 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -37,7 +37,7 @@ int iio_pop_from_buffer(struct iio_buffer *buffer, void *data); static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, void *data, int64_t timestamp) { - if (indio_dev->scan_timestamp) { + if (ACCESS_PRIVATE(indio_dev, scan_timestamp)) { size_t ts_offset = indio_dev->scan_bytes / sizeof(int64_t) - 1; ((int64_t *)data)[ts_offset] = timestamp; } diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 333d1d8ccb37..6a4479616479 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -418,7 +418,7 @@ unsigned int iio_get_channel_ext_info_count(struct iio_channel *chan); * @chan: The channel being queried. * @attr: The ext_info attribute to read. * @buf: Where to store the attribute value. Assumed to hold - * at least PAGE_SIZE bytes. + * at least PAGE_SIZE bytes and to be aligned at PAGE_SIZE. * * Returns the number of bytes written to buf (perhaps w/o zero termination; * it need not even be a string), or an error code. @@ -445,7 +445,7 @@ ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr, * iio_read_channel_label() - read label for a given channel * @chan: The channel being queried. * @buf: Where to store the attribute value. Assumed to hold - * at least PAGE_SIZE bytes. + * at least PAGE_SIZE bytes and to be aligned at PAGE_SIZE. * * Returns the number of bytes written to buf, or an error code. */ diff --git a/include/linux/iio/iio-gts-helper.h b/include/linux/iio/iio-gts-helper.h index 9cb6c80dea71..66f830ab9b49 100644 --- a/include/linux/iio/iio-gts-helper.h +++ b/include/linux/iio/iio-gts-helper.h @@ -188,6 +188,9 @@ int iio_gts_total_gain_to_scale(struct iio_gts *gts, int total_gain, int iio_gts_find_gain_sel_for_scale_using_time(struct iio_gts *gts, int time_sel, int scale_int, int scale_nano, int *gain_sel); +int iio_gts_find_gain_time_sel_for_scale(struct iio_gts *gts, int scale_int, + int scale_nano, int *gain_sel, + int *time_sel); int iio_gts_get_scale(struct iio_gts *gts, int gain, int time, int *scale_int, int *scale_nano); int iio_gts_find_new_gain_sel_by_old_gain_time(struct iio_gts *gts, @@ -196,11 +199,15 @@ int iio_gts_find_new_gain_sel_by_old_gain_time(struct iio_gts *gts, int iio_gts_find_new_gain_by_old_gain_time(struct iio_gts *gts, int old_gain, int old_time, int new_time, int *new_gain); +int iio_gts_find_new_gain_by_gain_time_min(struct iio_gts *gts, int old_gain, + int old_time, int new_time, + int *new_gain, bool *in_range); int iio_gts_avail_times(struct iio_gts *gts, const int **vals, int *type, int *length); int iio_gts_all_avail_scales(struct iio_gts *gts, const int **vals, int *type, int *length); int iio_gts_avail_scales_for_time(struct iio_gts *gts, int time, const int **vals, int *type, int *length); +int iio_gts_get_total_gain(struct iio_gts *gts, int gain, int time); #endif diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index a89e7e43e441..4247497f3f8b 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -28,7 +28,7 @@ * @groupcounter: index of next attribute group * @legacy_scan_el_group: attribute group for legacy scan elements attribute group * @legacy_buffer_group: attribute group for legacy buffer attributes group - * @bounce_buffer: for devices that call iio_push_to_buffers_with_timestamp_unaligned() + * @bounce_buffer: for devices that call iio_push_to_buffers_with_ts_unaligned() * @bounce_buffer_size: size of currently allocate bounce buffer * @scan_index_timestamp: cache of the index to the timestamp * @clock_id: timestamping clock posix identifier diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index ae65890d4567..07a0e8132e88 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -9,7 +9,7 @@ #include <linux/device.h> #include <linux/cdev.h> -#include <linux/cleanup.h> +#include <linux/compiler_types.h> #include <linux/slab.h> #include <linux/iio/types.h> /* IIO TODO LIST */ @@ -611,7 +611,7 @@ struct iio_dev { const unsigned long *available_scan_masks; unsigned int __private masklength; const unsigned long *active_scan_mask; - bool scan_timestamp; + bool __private scan_timestamp; struct iio_trigger *trig; struct iio_poll_func *pollfunc; struct iio_poll_func *pollfunc_event; @@ -663,30 +663,29 @@ int iio_device_claim_direct_mode(struct iio_dev *indio_dev); void iio_device_release_direct_mode(struct iio_dev *indio_dev); /* - * This autocleanup logic is normally used via - * iio_device_claim_direct_scoped(). + * Helper functions that allow claim and release of direct mode + * in a fashion that doesn't generate many false positives from sparse. + * Note this must remain static inline in the header so that sparse + * can see the __acquire() marking. Revisit when sparse supports + * __cond_acquires() */ -DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T), - iio_device_release_direct_mode(_T)) +static inline bool iio_device_claim_direct(struct iio_dev *indio_dev) +{ + int ret = iio_device_claim_direct_mode(indio_dev); -DEFINE_GUARD_COND(iio_claim_direct, _try, ({ - struct iio_dev *dev; - int d = iio_device_claim_direct_mode(_T); + if (ret) + return false; - if (d < 0) - dev = NULL; - else - dev = _T; - dev; - })) + __acquire(iio_dev); -/** - * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct. - * @fail: What to do on failure to claim device. - * @iio_dev: Pointer to the IIO devices structure - */ -#define iio_device_claim_direct_scoped(fail, iio_dev) \ - scoped_cond_guard(iio_claim_direct_try, fail, iio_dev) + return true; +} + +static inline void iio_device_release_direct(struct iio_dev *indio_dev) +{ + iio_device_release_direct_mode(indio_dev); + __release(indio_dev); +} int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); void iio_device_release_buffer_mode(struct iio_dev *indio_dev); diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index e6a75356567a..aa160511e265 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -44,6 +44,8 @@ struct adis_timeout { * @glob_cmd_reg: Register address of the GLOB_CMD register * @msc_ctrl_reg: Register address of the MSC_CTRL register * @diag_stat_reg: Register address of the DIAG_STAT register + * @diag_stat_size: Length (in bytes) of the DIAG_STAT register. If 0 the + * default length is 2 bytes long. * @prod_id_reg: Register address of the PROD_ID register * @prod_id: Product ID code that should be expected when reading @prod_id_reg * @self_test_mask: Bitmask of supported self-test operations @@ -70,6 +72,7 @@ struct adis_data { unsigned int glob_cmd_reg; unsigned int msc_ctrl_reg; unsigned int diag_stat_reg; + unsigned int diag_stat_size; unsigned int prod_id_reg; unsigned int prod_id; @@ -95,13 +98,28 @@ struct adis_data { }; /** + * struct adis_ops: Custom ops for adis devices. + * @write: Custom spi write implementation. + * @read: Custom spi read implementation. + * @reset: Custom sw reset implementation. The custom implementation does not + * need to sleep after the reset. It's done by the library already. + */ +struct adis_ops { + int (*write)(struct adis *adis, unsigned int reg, unsigned int value, + unsigned int size); + int (*read)(struct adis *adis, unsigned int reg, unsigned int *value, + unsigned int size); + int (*reset)(struct adis *adis); +}; + +/** * struct adis - ADIS device instance data * @spi: Reference to SPI device which owns this ADIS IIO device * @trig: IIO trigger object data * @data: ADIS chip variant specific data - * @burst: ADIS burst transfer information * @burst_extra_len: Burst extra length. Should only be used by devices that can * dynamically change their burst mode length. + * @ops: ops struct for custom read and write functions * @state_lock: Lock used by the device to protect state * @msg: SPI message object * @xfer: SPI transfer objects to be used for a @msg @@ -117,6 +135,7 @@ struct adis { const struct adis_data *data; unsigned int burst_extra_len; + const struct adis_ops *ops; /** * The state_lock is meant to be used during operations that require * a sequence of SPI R/W in order to protect the SPI transfer @@ -169,7 +188,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, u8 val) { - return __adis_write_reg(adis, reg, val, 1); + return adis->ops->write(adis, reg, val, 1); } /** @@ -181,7 +200,7 @@ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, u16 val) { - return __adis_write_reg(adis, reg, val, 2); + return adis->ops->write(adis, reg, val, 2); } /** @@ -193,7 +212,7 @@ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg, u32 val) { - return __adis_write_reg(adis, reg, val, 4); + return adis->ops->write(adis, reg, val, 4); } /** @@ -208,7 +227,7 @@ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg, unsigned int tmp; int ret; - ret = __adis_read_reg(adis, reg, &tmp, 2); + ret = adis->ops->read(adis, reg, &tmp, 2); if (ret == 0) *val = tmp; @@ -227,7 +246,7 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, unsigned int tmp; int ret; - ret = __adis_read_reg(adis, reg, &tmp, 4); + ret = adis->ops->read(adis, reg, &tmp, 4); if (ret == 0) *val = tmp; @@ -245,7 +264,7 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg, unsigned int val, unsigned int size) { guard(mutex)(&adis->state_lock); - return __adis_write_reg(adis, reg, val, size); + return adis->ops->write(adis, reg, val, size); } /** @@ -259,7 +278,7 @@ static int adis_read_reg(struct adis *adis, unsigned int reg, unsigned int *val, unsigned int size) { guard(mutex)(&adis->state_lock); - return __adis_read_reg(adis, reg, val, size); + return adis->ops->read(adis, reg, val, size); } /** diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h index 37572e4dc73a..1ee237b56183 100644 --- a/include/linux/iio/timer/stm32-timer-trigger.h +++ b/include/linux/iio/timer/stm32-timer-trigger.h @@ -72,6 +72,12 @@ #define TIM17_OC1 "tim17_oc1" +#define TIM20_OC1 "tim20_oc1" +#define TIM20_OC2 "tim20_oc2" +#define TIM20_OC3 "tim20_oc3" +#define TIM20_TRGO "tim20_trgo" +#define TIM20_TRGO2 "tim20_trgo2" + #if IS_REACHABLE(CONFIG_IIO_STM32_TIMER_TRIGGER) bool is_stm32_timer_trigger(struct iio_trigger *trig); #else diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index bc7babe91b2e..bf675a8aef8a 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -4,14 +4,14 @@ #ifdef CONFIG_NOINSTR_VALIDATION +#include <linux/objtool.h> #include <linux/stringify.h> /* Begin/end of an instrumentation safe region */ #define __instrumentation_begin(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_begin\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + ANNOTATE_INSTR_BEGIN(__ASM_BREF(c)) \ + : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -48,9 +48,8 @@ */ #define __instrumentation_end(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_end\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + ANNOTATE_INSTR_END(__ASM_BREF(c)) \ + : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else /* !CONFIG_NOINSTR_VALIDATION */ diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index 771622650247..dfbf7d9d7bb5 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -100,7 +100,6 @@ void ishtp_cl_destroy_connection(struct ishtp_cl *cl, bool reset); int ishtp_cl_send(struct ishtp_cl *cl, uint8_t *buf, size_t length); int ishtp_cl_flush_queues(struct ishtp_cl *cl); int ishtp_cl_io_rb_recycle(struct ishtp_cl_rb *rb); -bool ishtp_cl_tx_empty(struct ishtp_cl *cl); struct ishtp_cl_rb *ishtp_cl_rx_get_rb(struct ishtp_cl *cl); void *ishtp_get_client_data(struct ishtp_cl *cl); void ishtp_set_client_data(struct ishtp_cl *cl, void *data); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8cd9327e4e78..c782a74d2a30 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -448,7 +448,7 @@ irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, static inline void disable_irq_nosync_lockdep(unsigned int irq) { disable_irq_nosync(irq); -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_disable(); #endif } @@ -456,22 +456,14 @@ static inline void disable_irq_nosync_lockdep(unsigned int irq) static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags) { disable_irq_nosync(irq); -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_save(*flags); #endif } -static inline void disable_irq_lockdep(unsigned int irq) -{ - disable_irq(irq); -#ifdef CONFIG_LOCKDEP - local_irq_disable(); -#endif -} - static inline void enable_irq_lockdep(unsigned int irq) { -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_enable(); #endif enable_irq(irq); @@ -479,7 +471,7 @@ static inline void enable_irq_lockdep(unsigned int irq) static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags) { -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_restore(*flags); #endif enable_irq(irq); diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index aaa8a0767aa3..1b400f26f63d 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ if (ITSTART(node) <= last) { /* Cond1 */ \ if (start <= ITLAST(node)) /* Cond2 */ \ return node; /* node is leftmost match */ \ - if (node->ITRB.rb_right) { \ - node = rb_entry(node->ITRB.rb_right, \ - ITSTRUCT, ITRB); \ - if (start <= node->ITSUBTREE) \ - continue; \ - } \ + node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \ + continue; \ } \ return NULL; /* No match */ \ } \ diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index f32522bb3aa5..d3eade7cf663 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -101,22 +101,38 @@ static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) #ifndef ioread64 #define ioread64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64 __ioread64_hi_lo +#else #define ioread64 ioread64_hi_lo #endif +#endif #ifndef iowrite64 #define iowrite64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64 __iowrite64_hi_lo +#else #define iowrite64 iowrite64_hi_lo #endif +#endif #ifndef ioread64be #define ioread64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64be __ioread64be_hi_lo +#else #define ioread64be ioread64be_hi_lo #endif +#endif #ifndef iowrite64be #define iowrite64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64be __iowrite64be_hi_lo +#else #define iowrite64be iowrite64be_hi_lo #endif +#endif #endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index 448a21435dba..94e676ec3d3f 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -101,22 +101,38 @@ static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) #ifndef ioread64 #define ioread64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64 __ioread64_lo_hi +#else #define ioread64 ioread64_lo_hi #endif +#endif #ifndef iowrite64 #define iowrite64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64 __iowrite64_lo_hi +#else #define iowrite64 iowrite64_lo_hi #endif +#endif #ifndef ioread64be #define ioread64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64be __ioread64be_lo_hi +#else #define ioread64be ioread64be_lo_hi #endif +#endif #ifndef iowrite64be #define iowrite64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64be __iowrite64be_lo_hi +#else #define iowrite64be iowrite64be_lo_hi #endif +#endif #endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ce86b09ae80f..bba2a51c87d2 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -181,11 +181,21 @@ struct io_pgtable_cfg { }; /** + * struct arm_lpae_io_pgtable_walk_data - information from a pgtable walk + * + * @ptes: The recorded PTE values from the walk + */ +struct arm_lpae_io_pgtable_walk_data { + u64 ptes[4]; +}; + +/** * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * * @map_pages: Map a physically contiguous range of pages of the same size. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. + * @pgtable_walk: (optional) Perform a page table walk for a given iova. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -199,6 +209,7 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, void *wd); int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, unsigned long flags, diff --git a/include/linux/io.h b/include/linux/io.h index 59ec5eea696c..6a6bc4d46d0a 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -9,13 +9,10 @@ #include <linux/sizes.h> #include <linux/types.h> #include <linux/init.h> -#include <linux/bug.h> -#include <linux/err.h> #include <asm/io.h> #include <asm/page.h> struct device; -struct resource; #ifndef __iowrite32_copy void __iowrite32_copy(void __iomem *to, const void *from, size_t count); @@ -65,8 +62,6 @@ static inline void devm_ioport_unmap(struct device *dev, void __iomem *addr) } #endif -#define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err) - void __iomem *devm_ioremap(struct device *dev, resource_size_t offset, resource_size_t size); void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset, diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index a3ce553413de..0634a3de1782 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -4,6 +4,7 @@ #include <uapi/linux/io_uring.h> #include <linux/io_uring_types.h> +#include <linux/blk-mq.h> /* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ #define IORING_URING_CMD_CANCELABLE (1U << 30) @@ -19,7 +20,6 @@ struct io_uring_cmd { }; struct io_uring_cmd_data { - struct io_uring_sqe sqes[2]; void *op_data; }; @@ -39,7 +39,14 @@ static inline void io_uring_cmd_private_sz_check(size_t cmd_sz) #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd); + struct iov_iter *iter, + struct io_uring_cmd *ioucmd, + unsigned int issue_flags); +int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, + const struct iovec __user *uvec, + size_t uvec_segs, + int ddir, struct iov_iter *iter, + unsigned issue_flags); /* * Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd @@ -66,8 +73,18 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd); #else -static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd) +static inline int +io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, struct io_uring_cmd *ioucmd, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} +static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, + const struct iovec __user *uvec, + size_t uvec_segs, + int ddir, struct iov_iter *iter, + unsigned issue_flags) { return -EOPNOTSUPP; } @@ -123,4 +140,10 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur return cmd_to_io_kiocb(cmd)->async_data; } +int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, + void (*release)(void *), unsigned int index, + unsigned int issue_flags); +int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index, + unsigned int issue_flags); + #endif /* _LINUX_IO_URING_CMD_H */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index fd4cdb0860a2..b44d201520d8 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -40,6 +40,8 @@ enum io_uring_cmd_flags { IO_URING_F_TASK_DEAD = (1 << 13), }; +struct io_zcrx_ifq; + struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -78,8 +80,9 @@ struct io_hash_table { struct io_mapped_region { struct page **pages; - void *vmap_ptr; - size_t nr_pages; + void *ptr; + unsigned nr_pages; + unsigned flags; }; /* @@ -107,6 +110,14 @@ struct io_uring_task { } ____cacheline_aligned_in_smp; }; +struct iou_vec { + union { + struct iovec *iovec; + struct bio_vec *bvec; + }; + unsigned nr; /* number of struct iovec it can hold */ +}; + struct io_uring { u32 head; u32 tail; @@ -221,7 +232,8 @@ struct io_alloc_cache { void **entries; unsigned int nr_cached; unsigned int max_cached; - size_t elem_size; + unsigned int elem_size; + unsigned int init_clear; }; struct io_ring_ctx { @@ -290,16 +302,23 @@ struct io_ring_ctx { struct io_file_table file_table; struct io_rsrc_data buf_table; + struct io_alloc_cache node_cache; + struct io_alloc_cache imu_cache; struct io_submit_state submit_state; + /* + * Modifications are protected by ->uring_lock and ->mmap_lock. + * The flags, buf_pages and buf_nr_pages fields should be stable + * once published. + */ struct xarray io_bl_xa; struct io_hash_table cancel_table; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; struct io_alloc_cache rw_cache; - struct io_alloc_cache uring_cache; + struct io_alloc_cache cmd_cache; /* * Any cancelable uring_cmd is added to this list in @@ -353,7 +372,6 @@ struct io_ring_ctx { spinlock_t completion_lock; - struct list_head io_buffers_comp; struct list_head cq_overflow_list; struct hlist_head waitid_list; @@ -372,12 +390,12 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; - struct list_head io_buffers_cache; - /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; + struct io_zcrx_ifq *ifq; + u32 pers_next; struct xarray personalities; @@ -424,23 +442,25 @@ struct io_ring_ctx { * side will need to grab this lock, to prevent either side from * being run concurrently with the other. */ - struct mutex resize_lock; - - /* - * If IORING_SETUP_NO_MMAP is used, then the below holds - * the gup'ed pages for the two rings, and the sqes. - */ - unsigned short n_ring_pages; - unsigned short n_sqe_pages; - struct page **ring_pages; - struct page **sqe_pages; + struct mutex mmap_lock; + struct io_mapped_region sq_region; + struct io_mapped_region ring_region; /* used for optimised request parameter and wait argument passing */ struct io_mapped_region param_region; + /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */ + struct io_mapped_region zcrx_region; }; +/* + * Token indicating function is called in task work context: + * ctx->uring_lock is held and any completions generated will be flushed. + * ONLY core io_uring.c should instantiate this struct. + */ struct io_tw_state { }; +/* Alias to use in code that doesn't instantiate struct io_tw_state */ +typedef struct io_tw_state io_tw_token_t; enum { REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, @@ -470,6 +490,7 @@ enum { REQ_F_SKIP_LINK_CQES_BIT, REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, + REQ_F_MULTISHOT_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, /* keep async read/write and isreg together and in order */ @@ -481,6 +502,8 @@ enum { REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, REQ_F_BUF_NODE_BIT, + REQ_F_HAS_METADATA_BIT, + REQ_F_IMPORT_BUFFER_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -545,6 +568,8 @@ enum { REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT), /* double poll may active */ REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT), + /* request posts multiple completions, should be set at prep time */ + REQ_F_MULTISHOT = IO_REQ_FLAG(REQ_F_MULTISHOT_BIT), /* fast poll multishot mode */ REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ @@ -561,9 +586,16 @@ enum { REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), /* buf node is valid */ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), + /* request has read/write metadata assigned */ + REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT), + /* + * For vectored fixed buffers, resolve iovec to registered buffers. + * For SEND_ZC, whether to import buffers (i.e. the first issue). + */ + REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT), }; -typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw); struct io_task_work { struct llist_node node; @@ -598,7 +630,11 @@ static inline void io_kiocb_cmd_sz_check(size_t cmd_sz) io_kiocb_cmd_sz_check(sizeof(cmd_type)) , \ ((cmd_type *)&(req)->cmd) \ ) -#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr) + +static inline struct io_kiocb *cmd_to_io_kiocb(void *ptr) +{ + return ptr; +} struct io_kiocb { union { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 75bf54e76f3b..522644d62f30 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -56,6 +56,13 @@ struct vm_fault; * * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must * never be merged with the mapping before it. + * + * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block + * assigned to it yet and the file system will do that in the bio submission + * handler, splitting the I/O as needed. + * + * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic + * bio, i.e. set REQ_ATOMIC. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -68,6 +75,13 @@ struct vm_fault; #endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) #define IOMAP_F_BOUNDARY (1U << 6) +#define IOMAP_F_ANON_WRITE (1U << 7) +#define IOMAP_F_ATOMIC_BIO (1U << 8) + +/* + * Flag reserved for file system specific usage + */ +#define IOMAP_F_PRIVATE (1U << 12) /* * Flags set by the core iomap code during operations: @@ -79,14 +93,8 @@ struct vm_fault; * range it covers needs to be remapped by the high level before the operation * can proceed. */ -#define IOMAP_F_SIZE_CHANGED (1U << 8) -#define IOMAP_F_STALE (1U << 9) - -/* - * Flags from 0x1000 up are for file system specific usage: - */ -#define IOMAP_F_PRIVATE (1U << 12) - +#define IOMAP_F_SIZE_CHANGED (1U << 14) +#define IOMAP_F_STALE (1U << 15) /* * Magic value for addr: @@ -111,6 +119,8 @@ struct iomap { static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) { + if (iomap->flags & IOMAP_F_ANON_WRITE) + return U64_MAX; /* invalid */ return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; } @@ -182,7 +192,8 @@ struct iomap_folio_ops { #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ -#define IOMAP_ATOMIC (1 << 9) +#define IOMAP_ATOMIC (1 << 9) /* torn-write protection */ +#define IOMAP_DONTCACHE (1 << 10) struct iomap_ops { /* @@ -211,8 +222,10 @@ struct iomap_ops { * calls to iomap_iter(). Treat as read-only in the body. * @len: The remaining length of the file segment we're operating on. * It is updated at the same time as @pos. - * @processed: The number of bytes processed by the body in the most recent - * iteration, or a negative errno. 0 causes the iteration to stop. + * @iter_start_pos: The original start pos for the current iomap. Used for + * incremental iter advance. + * @status: Status of the most recent iteration. Zero on success or a negative + * errno on error. * @flags: Zero or more of the iomap_begin flags above. * @iomap: Map describing the I/O iteration * @srcmap: Source map for COW operations @@ -221,7 +234,8 @@ struct iomap_iter { struct inode *inode; loff_t pos; u64 len; - s64 processed; + loff_t iter_start_pos; + int status; unsigned flags; struct iomap iomap; struct iomap srcmap; @@ -229,20 +243,46 @@ struct iomap_iter { }; int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops); +int iomap_iter_advance(struct iomap_iter *iter, u64 *count); /** - * iomap_length - length of the current iomap iteration + * iomap_length_trim - trimmed length of the current iomap iteration * @iter: iteration structure + * @pos: File position to trim from. + * @len: Length of the mapping to trim to. * - * Returns the length that the operation applies to for the current iteration. + * Returns a trimmed length that the operation applies to for the current + * iteration. */ -static inline u64 iomap_length(const struct iomap_iter *iter) +static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos, + u64 len) { u64 end = iter->iomap.offset + iter->iomap.length; if (iter->srcmap.type != IOMAP_HOLE) end = min(end, iter->srcmap.offset + iter->srcmap.length); - return min(iter->len, end - iter->pos); + return min(len, end - pos); +} + +/** + * iomap_length - length of the current iomap iteration + * @iter: iteration structure + * + * Returns the length that the operation applies to for the current iteration. + */ +static inline u64 iomap_length(const struct iomap_iter *iter) +{ + return iomap_length_trim(iter, iter->pos, iter->len); +} + +/** + * iomap_iter_advance_full - advance by the full length of current map + */ +static inline int iomap_iter_advance_full(struct iomap_iter *iter) +{ + u64 length = iomap_length(iter); + + return iomap_iter_advance(iter, &length); } /** @@ -306,12 +346,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, - bool *did_zero, const struct iomap_ops *ops); + bool *did_zero, const struct iomap_ops *ops, void *private); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops); -vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, - const struct iomap_ops *ops); - + const struct iomap_ops *ops, void *private); +vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, + void *private); typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, struct iomap *iomap); void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, @@ -328,16 +367,45 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops); /* + * Flags for iomap_ioend->io_flags. + */ +/* shared COW extent */ +#define IOMAP_IOEND_SHARED (1U << 0) +/* unwritten extent */ +#define IOMAP_IOEND_UNWRITTEN (1U << 1) +/* don't merge into previous ioend */ +#define IOMAP_IOEND_BOUNDARY (1U << 2) +/* is direct I/O */ +#define IOMAP_IOEND_DIRECT (1U << 3) +/* is DONTCACHE I/O */ +#define IOMAP_IOEND_DONTCACHE (1U << 4) + +/* + * Flags that if set on either ioend prevent the merge of two ioends. + * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) + */ +#define IOMAP_IOEND_NOMERGE_FLAGS \ + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \ + IOMAP_IOEND_DONTCACHE) + +/* * Structure for writeback I/O completions. + * + * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io + * for direct I/O) can split a bio generated by iomap. In that case the parent + * ioend it was split from is recorded in ioend->io_parent. */ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ - u16 io_type; - u16 io_flags; /* IOMAP_F_* */ + u16 io_flags; /* IOMAP_IOEND_* */ struct inode *io_inode; /* file being written to */ - size_t io_size; /* size of data within eof */ + size_t io_size; /* size of the extent */ + atomic_t io_remaining; /* completetion defer count */ + int io_error; /* stashed away status */ + struct iomap_ioend *io_parent; /* parent for completions */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ + void *io_private; /* file system private data */ struct bio io_bio; /* MUST BE LAST! */ }; @@ -362,12 +430,14 @@ struct iomap_writeback_ops { loff_t offset, unsigned len); /* - * Optional, allows the file systems to perform actions just before - * submitting the bio and/or override the bio end_io handler for complex - * operations like copy on write extent manipulation or unwritten extent - * conversions. + * Optional, allows the file systems to hook into bio submission, + * including overriding the bi_end_io handler. + * + * Returns 0 if the bio was successfully submitted, or a negative + * error code if status was non-zero or another error happened and + * the bio could not be submitted. */ - int (*prepare_ioend)(struct iomap_ioend *ioend, int status); + int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); /* * Optional, allows the file system to discard state on a page where @@ -383,6 +453,10 @@ struct iomap_writepage_ctx { u32 nr_folios; /* folios added to the ioend */ }; +struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, + loff_t file_offset, u16 ioend_flags); +struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, + unsigned int max_len, bool is_append); void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); @@ -454,4 +528,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, # define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO) #endif /* CONFIG_SWAP */ +extern struct bio_set iomap_ioend_bioset; + #endif /* LINUX_IOMAP_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 318d27841130..4273871845ee 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,9 +41,12 @@ struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; +struct iommu_dma_msi_cookie; struct iommu_fault_param; struct iommufd_ctx; struct iommufd_viommu; +struct msi_desc; +struct msi_msg; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -163,6 +166,15 @@ struct iommu_domain_geometry { bool force_aperture; /* DMA only allowed in mappable range? */ }; +enum iommu_domain_cookie_type { + IOMMU_COOKIE_NONE, + IOMMU_COOKIE_DMA_IOVA, + IOMMU_COOKIE_DMA_MSI, + IOMMU_COOKIE_FAULT_HANDLER, + IOMMU_COOKIE_SVA, + IOMMU_COOKIE_IOMMUFD, +}; + /* Domain feature flags */ #define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */ #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API @@ -209,15 +221,18 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; + enum iommu_domain_cookie_type cookie_type; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; - struct iommu_dma_cookie *iova_cookie; int (*iopf_handler)(struct iopf_group *group); - void *fault_data; - union { + + union { /* cookie */ + struct iommu_dma_cookie *iova_cookie; + struct iommu_dma_msi_cookie *msi_cookie; + struct iommufd_hw_pagetable *iommufd_hwpt; struct { iommu_fault_handler_t handler; void *handler_token; @@ -425,10 +440,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -441,8 +456,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ @@ -587,9 +602,6 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains - * @remove_dev_pasid: Remove any translation configurations of a specific - * pasid, so that any DMA transactions with this pasid - * will be blocked by the hardware. * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind * the @dev, as the set of virtualization resources shared/passed * to user space IOMMU instance. And associate it with a nesting @@ -647,8 +659,6 @@ struct iommu_ops { struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); - void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain); struct iommufd_viommu *(*viommu_alloc)( struct device *dev, struct iommu_domain *parent_domain, @@ -740,6 +750,7 @@ struct iommu_domain_ops { * @dev: struct device for sysfs handling * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs + * @ready: set once iommu_device_register() has completed successfully */ struct iommu_device { struct list_head list; @@ -748,6 +759,7 @@ struct iommu_device { struct device *dev; struct iommu_group *singleton_group; u32 max_pasids; + bool ready; }; /** @@ -1084,7 +1096,6 @@ struct iommu_mm_data { }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); -void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) @@ -1395,10 +1406,6 @@ static inline int iommu_fwspec_init(struct device *dev, return -ENODEV; } -static inline void iommu_fwspec_free(struct device *dev) -{ -} - static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) { @@ -1475,6 +1482,18 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ +#ifdef CONFIG_IRQ_MSI_IOMMU +#ifdef CONFIG_IOMMU_API +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); +#else +static inline int iommu_dma_prepare_msi(struct msi_desc *desc, + phys_addr_t msi_addr) +{ + return 0; +} +#endif /* CONFIG_IOMMU_API */ +#endif /* CONFIG_IRQ_MSI_IOMMU */ + #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) void iommu_group_mutex_assert(struct device *dev); #else @@ -1508,32 +1527,12 @@ static inline void iommu_debugfs_setup(void) {} #endif #ifdef CONFIG_IOMMU_DMA -#include <linux/msi.h> - int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); - -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); - #else /* CONFIG_IOMMU_DMA */ - -struct msi_desc; -struct msi_msg; - static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { return -ENODEV; } - -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) -{ - return 0; -} - -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{ -} - #endif /* CONFIG_IOMMU_DMA */ /* diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 11110c749200..34b6e6ca4bfa 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,9 +8,11 @@ #include <linux/err.h> #include <linux/errno.h> +#include <linux/iommu.h> #include <linux/refcount.h> #include <linux/types.h> #include <linux/xarray.h> +#include <uapi/linux/iommufd.h> struct device; struct file; @@ -34,6 +36,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, + IOMMUFD_OBJ_VEVENTQ, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -52,9 +55,11 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); -void iommufd_device_detach(struct iommufd_device *idev); +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); u32 iommufd_device_to_id(struct iommufd_device *idev); @@ -93,6 +98,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; struct xarray vdevs; + struct list_head veventqs; + struct rw_semaphore veventqs_rwsem; unsigned int type; }; @@ -187,6 +194,11 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, enum iommufd_object_type type); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id); +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -200,6 +212,20 @@ iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { return NULL; } + +static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, + unsigned long *vdev_id) +{ + return -ENOENT; +} + +static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, + void *event_data, size_t data_len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5385349f0b8a..e8b2d6aa4013 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -154,15 +154,20 @@ enum { }; /* helpers to define resources */ -#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ +#define DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, _desc) \ (struct resource) { \ .start = (_start), \ .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ - .desc = IORES_DESC_NONE, \ + .desc = (_desc), \ } +#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ + DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, IORES_DESC_NONE) +#define DEFINE_RES(_start, _size, _flags) \ + DEFINE_RES_NAMED(_start, _size, NULL, _flags) + #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) #define DEFINE_RES_IO(_start, _size) \ diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index a1c9c0d48ebf..2f74dd90c271 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -126,7 +126,7 @@ int ipmi_create_user(unsigned int if_num, * the users before you destroy the callback structures, it should be * safe, too. */ -int ipmi_destroy_user(struct ipmi_user *user); +void ipmi_destroy_user(struct ipmi_user *user); /* Get the IPMI version of the BMC we are talking to. */ int ipmi_get_version(struct ipmi_user *user, diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a6e2aadbb91b..5aeeed22f35b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -207,6 +207,7 @@ struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; + u8 dontfrag:1; }; /* struct ipv6_pinfo - ipv6 private area */ diff --git a/include/linux/irq.h b/include/linux/irq.h index fa711f80957b..dd5df1e2d032 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -64,7 +64,6 @@ enum irqchip_irq_state; * IRQ_NOAUTOEN - Interrupt is not automatically enabled in * request/setup_irq() * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set) - * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context * IRQ_NESTED_THREAD - Interrupt nests into another thread * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable * IRQ_IS_POLLED - Always polled by another interrupt. Exclude @@ -93,7 +92,6 @@ enum { IRQ_NOREQUEST = (1 << 11), IRQ_NOAUTOEN = (1 << 12), IRQ_NO_BALANCING = (1 << 13), - IRQ_MOVE_PCNTXT = (1 << 14), IRQ_NESTED_THREAD = (1 << 15), IRQ_NOTHREAD = (1 << 16), IRQ_PER_CPU_DEVID = (1 << 17), @@ -105,7 +103,7 @@ enum { #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ - IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ + IRQ_NOAUTOEN | IRQ_LEVEL | IRQ_NO_BALANCING | \ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN) @@ -201,8 +199,6 @@ struct irq_data { * IRQD_LEVEL - Interrupt is level triggered * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup * from suspend - * IRQD_MOVE_PCNTXT - Interrupt can be moved in process - * context * IRQD_IRQ_DISABLED - Disabled state of the interrupt * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt @@ -233,7 +229,6 @@ enum { IRQD_AFFINITY_SET = BIT(12), IRQD_LEVEL = BIT(13), IRQD_WAKEUP_STATE = BIT(14), - IRQD_MOVE_PCNTXT = BIT(15), IRQD_IRQ_DISABLED = BIT(16), IRQD_IRQ_MASKED = BIT(17), IRQD_IRQ_INPROGRESS = BIT(18), @@ -338,11 +333,6 @@ static inline bool irqd_is_wakeup_set(struct irq_data *d) return __irqd_to_state(d) & IRQD_WAKEUP_STATE; } -static inline bool irqd_can_move_in_process_context(struct irq_data *d) -{ - return __irqd_to_state(d) & IRQD_MOVE_PCNTXT; -} - static inline bool irqd_irq_disabled(struct irq_data *d) { return __irqd_to_state(d) & IRQD_IRQ_DISABLED; @@ -496,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @ipi_send_mask: send an IPI to destination cpus in cpumask * @irq_nmi_setup: function called from core code before enabling an NMI * @irq_nmi_teardown: function called from core code after disabling an NMI + * @irq_force_complete_move: optional function to force complete pending irq move * @flags: chip specific flags */ struct irq_chip { @@ -547,6 +538,8 @@ struct irq_chip { int (*irq_nmi_setup)(struct irq_data *data); void (*irq_nmi_teardown)(struct irq_data *data); + void (*irq_force_complete_move)(struct irq_data *data); + unsigned long flags; }; @@ -567,6 +560,7 @@ struct irq_chip { * in the suspend path if they are in disabled state * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup * IRQCHIP_IMMUTABLE: Don't ever change anything in this chip + * IRQCHIP_MOVE_DEFERRED: Move the interrupt in actual interrupt context */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -581,6 +575,7 @@ enum { IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), IRQCHIP_IMMUTABLE = (1 << 11), + IRQCHIP_MOVE_DEFERRED = (1 << 12), }; #include <linux/irqdesc.h> @@ -620,6 +615,7 @@ extern int irq_affinity_online_cpu(unsigned int cpu); #endif #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) +bool irq_can_move_in_process_context(struct irq_data *data); void __irq_move_irq(struct irq_data *data); static inline void irq_move_irq(struct irq_data *data) { @@ -627,11 +623,10 @@ static inline void irq_move_irq(struct irq_data *data) __irq_move_irq(data); } void irq_move_masked_irq(struct irq_data *data); -void irq_force_complete_move(struct irq_desc *desc); #else +static inline bool irq_can_move_in_process_context(struct irq_data *data) { return true; } static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } -static inline void irq_force_complete_move(struct irq_desc *desc) { } #endif extern int no_irq_affinity; @@ -694,6 +689,9 @@ extern int irq_chip_request_resources_parent(struct irq_data *data); extern void irq_chip_release_resources_parent(struct irq_data *data); #endif +/* Disable or mask interrupts during a kernel kexec */ +extern void machine_kexec_mask_interrupts(void); + /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h deleted file mode 100644 index ea4e087fac98..000000000000 --- a/include/linux/irqchip/irq-davinci-aintc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_AINTC_ -#define _LINUX_IRQ_DAVINCI_AINTC_ - -#include <linux/ioport.h> - -/** - * struct davinci_aintc_config - configuration data for davinci-aintc driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - * @prios: an array of size num_irqs containing priority settings for - * each interrupt - */ -struct davinci_aintc_config { - struct resource reg; - unsigned int num_irqs; - u8 *prios; -}; - -void davinci_aintc_init(const struct davinci_aintc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_AINTC_ */ diff --git a/include/linux/irqchip/irq-davinci-cp-intc.h b/include/linux/irqchip/irq-davinci-cp-intc.h deleted file mode 100644 index 8d71ed5b5a61..000000000000 --- a/include/linux/irqchip/irq-davinci-cp-intc.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_CP_INTC_ -#define _LINUX_IRQ_DAVINCI_CP_INTC_ - -#include <linux/ioport.h> - -/** - * struct davinci_cp_intc_config - configuration data for davinci-cp-intc - * driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - */ -struct davinci_cp_intc_config { - struct resource reg; - unsigned int num_irqs; -}; - -int davinci_cp_intc_init(const struct davinci_cp_intc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_CP_INTC_ */ diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e432b6a12a32..bb7111105296 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -72,7 +72,7 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, /** * struct irq_domain_ops - Methods for irq_domain objects - * @match: Match an interrupt controller device node to a host, returns + * @match: Match an interrupt controller device node to a domain, returns * 1 on a match * @select: Match an interrupt controller fw specification. It is more generic * than @match as it receives a complete struct irq_fwspec. Therefore, @@ -350,13 +350,13 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); -extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token); -extern void irq_set_default_host(struct irq_domain *host); -extern struct irq_domain *irq_get_default_host(void); -extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, - irq_hw_number_t hwirq, int node, - const struct irq_affinity_desc *affinity); +struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); +void irq_set_default_domain(struct irq_domain *domain); +struct irq_domain *irq_get_default_domain(void); +int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, + irq_hw_number_t hwirq, int node, + const struct irq_affinity_desc *affinity); static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) { @@ -370,8 +370,8 @@ static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) return fwnode && fwnode->ops == &irqchip_fwnode_ops; } -extern void irq_domain_update_bus_token(struct irq_domain *domain, - enum irq_domain_bus_token bus_token); +void irq_domain_update_bus_token(struct irq_domain *domain, + enum irq_domain_bus_token bus_token); static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, @@ -454,7 +454,7 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod return IS_ERR(d) ? NULL : d; } -extern unsigned int irq_create_direct_mapping(struct irq_domain *host); +unsigned int irq_create_direct_mapping(struct irq_domain *domain); #endif static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, @@ -507,29 +507,29 @@ static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fw return IS_ERR(d) ? NULL : d; } -extern void irq_domain_remove(struct irq_domain *host); +void irq_domain_remove(struct irq_domain *domain); -extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, - irq_hw_number_t hwirq); -extern void irq_domain_associate_many(struct irq_domain *domain, - unsigned int irq_base, - irq_hw_number_t hwirq_base, int count); +int irq_domain_associate(struct irq_domain *domain, unsigned int irq, + irq_hw_number_t hwirq); +void irq_domain_associate_many(struct irq_domain *domain, + unsigned int irq_base, + irq_hw_number_t hwirq_base, int count); -extern unsigned int irq_create_mapping_affinity(struct irq_domain *host, - irq_hw_number_t hwirq, - const struct irq_affinity_desc *affinity); -extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); -extern void irq_dispose_mapping(unsigned int virq); +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity); +unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); +void irq_dispose_mapping(unsigned int virq); -static inline unsigned int irq_create_mapping(struct irq_domain *host, +static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - return irq_create_mapping_affinity(host, hwirq, NULL); + return irq_create_mapping_affinity(domain, hwirq, NULL); } -extern struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, - irq_hw_number_t hwirq, - unsigned int *irq); +struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq, + unsigned int *irq); static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) @@ -587,19 +587,21 @@ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ -extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, - unsigned int virq); -extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, - void *chip_data, irq_flow_handler_t handler, - void *handler_data, const char *handler_name); -extern void irq_domain_reset_irq_data(struct irq_data *irq_data); +struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, + unsigned int virq); +void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, + const struct irq_chip *chip, + void *chip_data, irq_flow_handler_t handler, + void *handler_data, const char *handler_name); +void irq_domain_reset_irq_data(struct irq_data *irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -extern struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, - unsigned int flags, unsigned int size, - struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, + unsigned int flags, + unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data); static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, unsigned int flags, @@ -613,13 +615,13 @@ static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *par ops, host_data); } -extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, - unsigned int nr_irqs, int node, void *arg, - bool realloc, - const struct irq_affinity_desc *affinity); -extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); -extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early); -extern void irq_domain_deactivate_irq(struct irq_data *irq_data); +int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + unsigned int nr_irqs, int node, void *arg, + bool realloc, + const struct irq_affinity_desc *affinity); +void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); +int irq_domain_activate_irq(struct irq_data *irq_data, bool early); +void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) @@ -628,32 +630,29 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, NULL); } -extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, - unsigned int irq_base, - unsigned int nr_irqs, void *arg); -extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, - unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, - void *chip_data); -extern void irq_domain_free_irqs_common(struct irq_domain *domain, - unsigned int virq, - unsigned int nr_irqs); -extern void irq_domain_free_irqs_top(struct irq_domain *domain, - unsigned int virq, unsigned int nr_irqs); - -extern int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); -extern int irq_domain_pop_irq(struct irq_domain *domain, int virq); - -extern int irq_domain_alloc_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, - unsigned int nr_irqs, void *arg); - -extern void irq_domain_free_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, - unsigned int nr_irqs); - -extern int irq_domain_disconnect_hierarchy(struct irq_domain *domain, +int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, + unsigned int virq, + irq_hw_number_t hwirq, + const struct irq_chip *chip, + void *chip_data); +void irq_domain_free_irqs_common(struct irq_domain *domain, + unsigned int virq, + unsigned int nr_irqs); +void irq_domain_free_irqs_top(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs); + +int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); +int irq_domain_pop_irq(struct irq_domain *domain, int virq); + +int irq_domain_alloc_irqs_parent(struct irq_domain *domain, + unsigned int irq_base, + unsigned int nr_irqs, void *arg); + +void irq_domain_free_irqs_parent(struct irq_domain *domain, + unsigned int irq_base, + unsigned int nr_irqs); + +int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq); static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 50f7ea8714bf..023e8abdb99a 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -28,7 +28,7 @@ #include <linux/slab.h> #include <linux/bit_spinlock.h> #include <linux/blkdev.h> -#include <crypto/hash.h> +#include <linux/crc32c.h> #endif #define journal_oom_retry 1 @@ -459,7 +459,6 @@ struct jbd2_revoke_table_s; * @h_ref: Reference count on this handle. * @h_err: Field for caller's use to track errors through large fs operations. * @h_sync: Flag for sync-on-close. - * @h_jdata: Flag to force data journaling. * @h_reserved: Flag for handle for reserved credits. * @h_aborted: Flag indicating fatal error on handle. * @h_type: For handle statistics. @@ -491,7 +490,6 @@ struct jbd2_journal_handle /* Flags [no locking] */ unsigned int h_sync: 1; - unsigned int h_jdata: 1; unsigned int h_reserved: 1; unsigned int h_aborted: 1; unsigned int h_type: 8; @@ -700,12 +698,6 @@ struct transaction_s /* Disk flush needs to be sent to fs partition [no locking] */ int t_need_data_flush; - - /* - * For use by the filesystem to store fs-specific data - * structures associated with the transaction - */ - struct list_head t_private_list; }; struct transaction_run_stats_s { @@ -1242,13 +1234,6 @@ struct journal_s void *j_private; /** - * @j_chksum_driver: - * - * Reference to checksum algorithm driver via cryptoapi. - */ - struct crypto_shash *j_chksum_driver; - - /** * @j_csum_seed: * * Precomputed journal UUID checksum for seeding other checksums. @@ -1395,9 +1380,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) #define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ #define JBD2_BARRIER 0x020 /* Use IDE barriers */ -#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file - * data write error in ordered - * mode */ #define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on * clean and empty filesystem * logging area */ @@ -1414,7 +1396,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) */ /* Filing buffers */ -extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); @@ -1634,10 +1615,12 @@ extern void jbd2_journal_destroy_revoke_record_cache(void); extern void jbd2_journal_destroy_revoke_table_cache(void); extern int __init jbd2_journal_init_revoke_record_cache(void); extern int __init jbd2_journal_init_revoke_table_cache(void); +struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size); +void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); -extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); +extern void jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); extern void jbd2_journal_write_revoke_records(transaction_t *transaction, struct list_head *log_bufs); @@ -1743,17 +1726,10 @@ static inline int tid_geq(tid_t x, tid_t y) extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); -static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) -{ - return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j); -} - static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { - WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && - journal->j_chksum_driver == NULL); - - return journal->j_chksum_driver != NULL; + return jbd2_has_feature_csum2(journal) || + jbd2_has_feature_csum3(journal); } static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) @@ -1790,27 +1766,10 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -/* JBD uses a CRC32 checksum */ -#define JBD_MAX_CHECKSUM_SIZE 4 - static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { - DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx, - DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE, - sizeof(*((struct shash_desc *)0)->__ctx))); - int err; - - BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > - JBD_MAX_CHECKSUM_SIZE); - - desc->tfm = journal->j_chksum_driver; - *(u32 *)desc->__ctx = crc; - - err = crypto_shash_update(desc, address, length); - BUG_ON(err); - - return *(u32 *)desc->__ctx; + return crc32c(crc, address, length); } /* Return most recent uncommitted transaction */ diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f5a2727ca4a9..fdb79dd1ebd8 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -75,6 +75,7 @@ #include <linux/types.h> #include <linux/compiler.h> +#include <linux/cleanup.h> extern bool static_key_initialized; @@ -347,6 +348,8 @@ static inline void static_key_disable(struct static_key *key) #endif /* CONFIG_JUMP_LABEL */ +DEFINE_LOCK_GUARD_0(jump_label_lock, jump_label_lock(), jump_label_unlock()) + #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 1c6a6c1704d8..d5dd54c53ace 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -55,12 +55,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) if (is_ksym_addr((unsigned long)ptr)) return ptr; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)ptr); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); - preempt_enable(); #endif return ptr; } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 6bbfc8aa42e8..890011071f2b 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -153,7 +153,7 @@ static __always_inline void kasan_unpoison_new_object(struct kmem_cache *cache, void __kasan_poison_new_object(struct kmem_cache *cache, void *object); /** - * kasan_unpoison_new_object - Repoison a new slab object. + * kasan_poison_new_object - Repoison a new slab object. * @cache: Cache the object belong to. * @object: Pointer to the object. * @@ -491,7 +491,6 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); -void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ @@ -509,7 +508,6 @@ static inline void kasan_cache_create(struct kmem_cache *cache, static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} -static inline void kasan_record_aux_stack_noalloc(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ diff --git a/include/linux/kcore.h b/include/linux/kcore.h index 86c0f1d18998..9a2fa013c91d 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -20,19 +20,6 @@ struct kcore_list { int type; }; -struct vmcore { - struct list_head list; - unsigned long long paddr; - unsigned long long size; - loff_t offset; -}; - -struct vmcoredd_node { - struct list_head list; /* List of dumps */ - void *buf; /* Buffer containing device's dump */ - unsigned int size; /* Size of the buffer */ -}; - #ifdef CONFIG_PROC_KCORE void __init kclist_add(struct kcore_list *, void *, size_t, int type); diff --git a/include/linux/kdb.h b/include/linux/kdb.h index f6c2ddb16b95..ecbf819deeca 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -104,7 +104,7 @@ extern int kdb_initial_cpu; #define KDB_NOENVVALUE (-6) #define KDB_NOTIMP (-7) #define KDB_ENVFULL (-8) -#define KDB_ENVBUFFULL (-9) +#define KDB_KMALLOCFAILED (-9) #define KDB_TOOMANYBPT (-10) #define KDB_TOOMANYDBREGS (-11) #define KDB_DUPBPT (-12) @@ -140,9 +140,6 @@ extern const char *kdb_diemsg; extern unsigned int kdb_flags; /* Global flags, see kdb_state for per cpu state */ -extern void kdb_save_flags(void); -extern void kdb_restore_flags(void); - #define KDB_FLAG(flag) (kdb_flags & KDB_FLAG_##flag) #define KDB_FLAG_SET(flag) ((void)(kdb_flags |= KDB_FLAG_##flag)) #define KDB_FLAG_CLEAR(flag) ((void)(kdb_flags &= ~KDB_FLAG_##flag)) diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 87c79d076d6d..b5a5f32fdfd1 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -147,6 +147,11 @@ enum kernfs_root_flag { * Support user xattrs to be written to nodes rooted at this root. */ KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, + + /* + * Renames must not change the parent node. + */ + KERNFS_ROOT_INVARIANT_PARENT = 0x0010, }; /* type-specific structures for kernfs_node union members */ @@ -199,8 +204,8 @@ struct kernfs_node { * never moved to a different parent, it is safe to access the * parent directly. */ - struct kernfs_node *parent; - const char *name; + struct kernfs_node __rcu *__parent; + const char __rcu *name; struct rb_node rb; @@ -395,7 +400,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); -int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, +int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); @@ -416,6 +421,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); +unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, @@ -514,6 +520,8 @@ kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } +static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) +{ return 0; } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f0e9f8eda7a3..c8971861521a 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -68,8 +68,6 @@ extern note_buf_t __percpu *crash_notes; #define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE #endif -#define KEXEC_CORE_NOTE_NAME CRASH_CORE_NOTE_NAME - /* * This structure is used to hold the arguments that are used when loading * kernel binaries. @@ -205,6 +203,15 @@ static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif +#ifndef arch_check_excluded_range +static inline int arch_check_excluded_range(struct kimage *image, + unsigned long start, + unsigned long end) +{ + return 0; +} +#endif + #ifdef CONFIG_KEXEC_SIG #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION int kexec_kernel_verify_pe_sig(const char *kernel, unsigned long kernel_len); diff --git a/include/linux/key.h b/include/linux/key.h index 074dca3222b9..ba05de8579ec 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,6 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ +#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 76e891ee9e37..5eebbe7a3545 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -257,7 +257,6 @@ extern void kgdb_arch_late(void); * hardware breakpoints. * @correct_hw_break: Allow an architecture to specify how to correct the * hardware debug registers. - * @enable_nmi: Manage NMI-triggered entry to KGDB */ struct kgdb_arch { unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE]; @@ -270,8 +269,6 @@ struct kgdb_arch { void (*disable_hw_break)(struct pt_regs *regs); void (*remove_all_hw_break)(void); void (*correct_hw_break)(void); - - void (*enable_nmi)(bool on); }; /** @@ -306,16 +303,6 @@ extern const struct kgdb_arch arch_kgdb_ops; extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs); -#ifdef CONFIG_SERIAL_KGDB_NMI -extern int kgdb_register_nmi_console(void); -extern int kgdb_unregister_nmi_console(void); -extern bool kgdb_nmi_poll_knock(void); -#else -static inline int kgdb_register_nmi_console(void) { return 0; } -static inline int kgdb_unregister_nmi_console(void) { return 0; } -static inline bool kgdb_nmi_poll_knock(void) { return true; } -#endif - extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); extern struct kgdb_io *dbg_io_ops; diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index be707748e7ce..150fe2ae1b6b 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -52,8 +52,6 @@ const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); bool kobj_ns_current_may_mount(enum kobj_ns_type type); void *kobj_ns_grab_current(enum kobj_ns_type type); -const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); -const void *kobj_ns_initial(enum kobj_ns_type type); void kobj_ns_drop(enum kobj_ns_type type, void *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/kref.h b/include/linux/kref.h index d32e21a2538c..88e82ab1367c 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -46,18 +46,18 @@ static inline void kref_get(struct kref *kref) } /** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the + * kref_put - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. + * Decrement the refcount, and if 0, call @release. The caller may not + * pass NULL or kfree() as the release function. + * + * Return: 1 if this call removed the object, otherwise return 0. Beware, + * if this function returns 0, another caller may have removed the object + * by the time this function returns. The return value is only certain + * if you want to see if the object is definitely released. */ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { @@ -68,17 +68,37 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) return 0; } +/** + * kref_put_mutex - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @mutex: Mutex which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @mutex + * held. The @release function will release the mutex. + */ static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), - struct mutex *lock) + struct mutex *mutex) { - if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { + if (refcount_dec_and_mutex_lock(&kref->refcount, mutex)) { release(kref); return 1; } return 0; } +/** + * kref_put_lock - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @lock: Spinlock which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @lock + * held. The @release function will release the lock. + */ static inline int kref_put_lock(struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) @@ -94,8 +114,6 @@ static inline int kref_put_lock(struct kref *kref, * kref_get_unless_zero - Increment refcount for object unless it is zero. * @kref: object. * - * Return non-zero if the increment succeeded. Otherwise return 0. - * * This function is intended to simplify locking around refcounting for * objects that can be looked up from a lookup structure, and which are * removed from that lookup structure in the object destructor. @@ -105,6 +123,8 @@ static inline int kref_put_lock(struct kref *kref, * With a lookup followed by a kref_get_unless_zero *with return value check* * locking in the kref_put path can be deferred to the actual removal from * the lookup structure and RCU lookups become trivial. + * + * Return: non-zero if the increment succeeded. Otherwise return 0. */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 6a53ac4885bb..d73095b5cd96 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -93,6 +93,7 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); void collect_procs_ksm(const struct folio *folio, const struct page *page, struct list_head *to_kill, int force_early); long ksm_process_profit(struct mm_struct *); +bool ksm_process_mergeable(struct mm_struct *mm); #else /* !CONFIG_KSM */ diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 7fcf29a4e0de..6ea897222af1 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -143,6 +143,7 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t */ extern unsigned long simple_strtoul(const char *,char **,unsigned int); +extern unsigned long simple_strntoul(const char *,char **,unsigned int,size_t); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index b11f53c1ba2e..8d27403888ce 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -85,6 +85,7 @@ kthread_run_on_cpu(int (*threadfn)(void *data), void *data, void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); +int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask); int kthread_stop(struct task_struct *k); int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); @@ -186,13 +187,58 @@ extern void __kthread_init_worker(struct kthread_worker *worker, int kthread_worker_fn(void *worker_ptr); -__printf(2, 3) -struct kthread_worker * -kthread_create_worker(unsigned int flags, const char namefmt[], ...); +__printf(3, 4) +struct kthread_worker *kthread_create_worker_on_node(unsigned int flags, + int node, + const char namefmt[], ...); + +#define kthread_create_worker(flags, namefmt, ...) \ + kthread_create_worker_on_node(flags, NUMA_NO_NODE, namefmt, ## __VA_ARGS__); + +/** + * kthread_run_worker - create and wake a kthread worker. + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the thread. + * + * Description: Convenient wrapper for kthread_create_worker() followed by + * wake_up_process(). Returns the kthread_worker or ERR_PTR(-ENOMEM). + */ +#define kthread_run_worker(flags, namefmt, ...) \ +({ \ + struct kthread_worker *__kw \ + = kthread_create_worker(flags, namefmt, ## __VA_ARGS__); \ + if (!IS_ERR(__kw)) \ + wake_up_process(__kw->task); \ + __kw; \ +}) -__printf(3, 4) struct kthread_worker * +struct kthread_worker * kthread_create_worker_on_cpu(int cpu, unsigned int flags, - const char namefmt[], ...); + const char namefmt[]); + +/** + * kthread_run_worker_on_cpu - create and wake a cpu bound kthread worker. + * @cpu: CPU number + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. + * + * Description: Convenient wrapper for kthread_create_worker_on_cpu() + * followed by wake_up_process(). Returns the kthread_worker or + * ERR_PTR(-ENOMEM). + */ +static inline struct kthread_worker * +kthread_run_worker_on_cpu(int cpu, unsigned int flags, + const char namefmt[]) +{ + struct kthread_worker *kw; + + kw = kthread_create_worker_on_cpu(cpu, flags, namefmt); + if (!IS_ERR(kw)) + wake_up_process(kw->task); + + return kw; +} bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work); diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 3a4e723eae0f..383ed9985802 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -222,6 +222,11 @@ static inline ktime_t ns_to_ktime(u64 ns) return ns; } +static inline ktime_t us_to_ktime(u64 us) +{ + return us * NSEC_PER_USEC; +} + static inline ktime_t ms_to_ktime(u64 ms) { return ms * NSEC_PER_MSEC; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b0b38744c4b0..291d49b9bf05 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -255,12 +255,19 @@ union kvm_mmu_notifier_arg { unsigned long attributes; }; +enum kvm_gfn_range_filter { + KVM_FILTER_SHARED = BIT(0), + KVM_FILTER_PRIVATE = BIT(1), +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; union kvm_mmu_notifier_arg arg; + enum kvm_gfn_range_filter attr_filter; bool may_block; + bool lockless; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); @@ -596,7 +603,12 @@ struct kvm_memory_slot { #ifdef CONFIG_KVM_PRIVATE_MEM struct { - struct file __rcu *file; + /* + * Writes protected by kvm->slots_lock. Acquiring a + * reference via kvm_gmem_get_file() is protected by + * either kvm->slots_lock or kvm->srcu. + */ + struct file *file; pgoff_t pgoff; } gmem; #endif @@ -979,9 +991,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) return xa_load(&kvm->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ - (atomic_read(&kvm->online_vcpus) - 1)) +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + if (atomic_read(&kvm->online_vcpus)) \ + xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { @@ -1192,7 +1205,7 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn * -- just change its flags * * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): + * differentiation is the best we can do for kvm_set_memory_region(): */ enum kvm_mr_change { KVM_MR_CREATE, @@ -1201,10 +1214,8 @@ enum kvm_mr_change { KVM_MR_FLAGS_ONLY, }; -int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem); -int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem); +int kvm_set_internal_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -1605,7 +1616,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); -int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm); @@ -1737,7 +1747,6 @@ static inline void kvm_unregister_perf_callbacks(void) {} int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); -void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); @@ -2373,7 +2382,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); diff --git a/include/linux/libata.h b/include/linux/libata.h index c1a85d46eba6..ec3b0c9c2a8c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -88,6 +88,7 @@ enum ata_quirks { __ATA_QUIRK_MAX_SEC_1024, /* Limit max sects to 1024 */ __ATA_QUIRK_MAX_TRIM_128M, /* Limit max trim size to 128M */ __ATA_QUIRK_NO_NCQ_ON_ATI, /* Disable NCQ on ATI chipset */ + __ATA_QUIRK_NO_LPM_ON_ATI, /* Disable LPM on ATI chipset */ __ATA_QUIRK_NO_ID_DEV_LOG, /* Identify device log missing */ __ATA_QUIRK_NO_LOG_DIR, /* Do not read log directory */ __ATA_QUIRK_NO_FUA, /* Do not use FUA */ @@ -432,6 +433,7 @@ enum { ATA_QUIRK_MAX_SEC_1024 = (1U << __ATA_QUIRK_MAX_SEC_1024), ATA_QUIRK_MAX_TRIM_128M = (1U << __ATA_QUIRK_MAX_TRIM_128M), ATA_QUIRK_NO_NCQ_ON_ATI = (1U << __ATA_QUIRK_NO_NCQ_ON_ATI), + ATA_QUIRK_NO_LPM_ON_ATI = (1U << __ATA_QUIRK_NO_LPM_ON_ATI), ATA_QUIRK_NO_ID_DEV_LOG = (1U << __ATA_QUIRK_NO_ID_DEV_LOG), ATA_QUIRK_NO_LOG_DIR = (1U << __ATA_QUIRK_NO_LOG_DIR), ATA_QUIRK_NO_FUA = (1U << __ATA_QUIRK_NO_FUA), @@ -1199,10 +1201,9 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); -extern int ata_scsi_slave_alloc(struct scsi_device *sdev); -int ata_scsi_device_configure(struct scsi_device *sdev, - struct queue_limits *lim); -extern void ata_scsi_slave_destroy(struct scsi_device *sdev); +extern int ata_scsi_sdev_init(struct scsi_device *sdev); +int ata_scsi_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim); +extern void ata_scsi_sdev_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth); extern int ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev, @@ -1301,8 +1302,8 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_port_free(struct ata_port *ap); extern int ata_tport_add(struct device *parent, struct ata_port *ap); extern void ata_tport_delete(struct ata_port *ap); -int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, - struct ata_port *ap); +int ata_sas_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim, + struct ata_port *ap); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); @@ -1362,7 +1363,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); +int ata_acpi_cbl_pata_type(struct ata_port *ap); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { @@ -1387,10 +1388,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, return 0; } -static inline int ata_acpi_cbl_80wire(struct ata_port *ap, - const struct ata_acpi_gtm *gtm) +static inline int ata_acpi_cbl_pata_type(struct ata_port *ap) { - return 0; + return ATA_CBL_PATA40; } #endif @@ -1458,8 +1458,8 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ - .slave_alloc = ata_scsi_slave_alloc, \ - .slave_destroy = ata_scsi_slave_destroy, \ + .sdev_init = ata_scsi_sdev_init, \ + .sdev_destroy = ata_scsi_sdev_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ .max_sectors = ATA_MAX_SECTORS_LBA48 @@ -1467,14 +1467,14 @@ extern const struct attribute_group *ata_common_sdev_groups[]; #define ATA_SUBBASE_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ .can_queue = ATA_DEF_QUEUE, \ - .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .device_configure = ata_scsi_device_configure + .tag_alloc_policy_rr = true, \ + .sdev_configure = ata_scsi_sdev_configure #define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \ __ATA_BASE_SHT(drv_name), \ .can_queue = drv_qd, \ - .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .device_configure = ata_scsi_device_configure + .tag_alloc_policy_rr = true, \ + .sdev_configure = ata_scsi_sdev_configure #define ATA_BASE_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h index fc388da6a027..0d68f9d6a6a7 100644 --- a/include/linux/libgcc.h +++ b/include/linux/libgcc.h @@ -34,4 +34,8 @@ long long notrace __lshrdi3(long long u, word_type b); long long notrace __muldi3(long long u, long long v); word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b); +#ifdef CONFIG_HAVE_ARCH_LIBGCC_H +#include <asm/libgcc.h> +#endif + #endif /* __ASM_LIBGCC_H */ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5c8865bb59d9..b11660b706c5 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -134,10 +134,6 @@ .size name, .-name #endif -/* If symbol 'name' is treated as a subroutine (gets called, and returns) - * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of - * static analysis tools such as stack depth analyzer. - */ #ifndef ENDPROC /* deprecated, use SYM_FUNC_END */ #define ENDPROC(name) \ diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 05c166811f6b..fe739d35a864 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -91,13 +91,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren * @memcg: the cgroup of the sublist to add the item to. * * If the element is already part of a list, this function returns doing - * nothing. Therefore the caller does not need to keep state about whether or - * not the element already belongs in the list and is allowed to lazy update - * it. Note however that this is valid for *a* list, not *this* list. If - * the caller organize itself in a way that elements can be in more than - * one type of list, it is up to the caller to fully remove the item from - * the previous list (with list_lru_del() for instance) before moving it - * to @lru. + * nothing. This means that it is not necessary to keep state about whether or + * not the element already belongs in the list. That said, this logic only + * works if the item is in *this* list. If the item might be in some other + * list, then you cannot rely on this check and you must remove it from the + * other list before trying to insert it. + * + * The lru list consists of many sublists internally; the @nid and @memcg + * parameters are used to determine which sublist to insert the item into. + * It's important to use the right value of @nid and @memcg when deleting the + * item, since it might otherwise get deleted from the wrong sublist. + * + * This also applies when attempting to insert the item multiple times - if + * the item is currently in one sublist and you call list_lru_add() again, you + * must pass the right @nid and @memcg parameters so that the same sublist is + * used. + * + * You must ensure that the memcg is not freed during this call (e.g., with + * rcu or by taking a css refcnt). * * Return: true if the list was updated, false otherwise */ @@ -113,7 +124,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, * memcg of the sublist is determined by @item list_head. This assumption is * valid for slab objects LRU such as dentries, inodes, etc. * - * Return value: true if the list was updated, false otherwise + * Return: true if the list was updated, false otherwise */ bool list_lru_add_obj(struct list_lru *lru, struct list_head *item); @@ -125,8 +136,19 @@ bool list_lru_add_obj(struct list_lru *lru, struct list_head *item); * @memcg: the cgroup of the sublist to delete the item from. * * This function works analogously as list_lru_add() in terms of list - * manipulation. The comments about an element already pertaining to - * a list are also valid for list_lru_del(). + * manipulation. + * + * The comments in list_lru_add() about an element already being in a list are + * also valid for list_lru_del(), that is, you can delete an item that has + * already been removed or never been added. However, if the item is in a + * list, it must be in *this* list, and you must pass the right value of @nid + * and @memcg so that the right sublist is used. + * + * You must ensure that the memcg is not freed during this call (e.g., with + * rcu or by taking a css refcnt). When a memcg is deleted, list_lru entries + * are automatically moved to the parent memcg. This is done in a race-free + * way, so during deletion of an memcg both the old and new memcg will resolve + * to the same sublist internally. * * Return: true if the list was updated, false otherwise */ @@ -142,7 +164,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, * memcg of the sublist is determined by @item list_head. This assumption is * valid for slab objects LRU such as dentries, inodes, etc. * - * Return value: true if the list was updated, false otherwise. + * Return: true if the list was updated, false otherwise. */ bool list_lru_del_obj(struct list_lru *lru, struct list_head *item); diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index fa6e8471bd22..248db9b77ee2 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -28,6 +28,7 @@ struct hlist_nulls_node { #define NULLS_MARKER(value) (1UL | (((long)value) << 1)) #define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) +#define HLIST_NULLS_HEAD_INIT(nulls) {.first = (struct hlist_nulls_node *)NULLS_MARKER(nulls)} #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 091dc0b6bdfb..16a2ee4f8310 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -51,6 +51,34 @@ #define local_unlock_irqrestore(lock, flags) \ __local_unlock_irqrestore(lock, flags) +/** + * local_lock_init - Runtime initialize a lock instance + */ +#define local_trylock_init(lock) __local_trylock_init(lock) + +/** + * local_trylock - Try to acquire a per CPU local lock + * @lock: The lock variable + * + * The function can be used in any context such as NMI or HARDIRQ. Due to + * locking constrains it will _always_ fail to acquire the lock in NMI or + * HARDIRQ context on PREEMPT_RT. + */ +#define local_trylock(lock) __local_trylock(lock) + +/** + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired + * @lock: The lock variable + * @flags: Storage for interrupt flags + * + * The function can be used in any context such as NMI or HARDIRQ. Due to + * locking constrains it will _always_ fail to acquire the lock in NMI or + * HARDIRQ context on PREEMPT_RT. + */ +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) + DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), local_unlock(_T)) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 8dd71fbbb6d2..8d5ac16a9b17 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,6 +15,12 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ +typedef struct { + local_lock_t llock; + u8 acquired; +} local_trylock_t; + #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ .dep_map = { \ @@ -24,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -31,6 +40,13 @@ static inline void local_lock_acquire(local_lock_t *l) l->owner = current; } +static inline void local_trylock_acquire(local_lock_t *l) +{ + lock_map_acquire_try(&l->dep_map); + DEBUG_LOCKS_WARN_ON(l->owner); + l->owner = current; +} + static inline void local_lock_release(local_lock_t *l) { DEBUG_LOCKS_WARN_ON(l->owner != current); @@ -44,12 +60,15 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } +static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -62,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -73,39 +94,105 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + __percpu local_lock_t *: (void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ + } while (0) + +#define __local_trylock(lock) \ + ({ \ + local_trylock_t *tl; \ + \ + preempt_disable(); \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ + preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ + } \ + !!tl; \ + }) + +#define __local_trylock_irqsave(lock, flags) \ + ({ \ + local_trylock_t *tl; \ + \ + local_irq_save(flags); \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ + local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ + } \ + !!tl; \ + }) + +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + __percpu local_lock_t *: (void)0); \ } while (0) #define __local_unlock(lock) \ do { \ - local_lock_release(this_cpu_ptr(lock)); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) #define __local_unlock_irq(lock) \ do { \ - local_lock_release(this_cpu_ptr(lock)); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) #define __local_unlock_irqrestore(lock, flags) \ do { \ - local_lock_release(this_cpu_ptr(lock)); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) @@ -125,14 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -169,4 +260,26 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) +#define __local_trylock(lock) \ + ({ \ + int __locked; \ + \ + if (in_nmi() | in_hardirq()) { \ + __locked = 0; \ + } else { \ + migrate_disable(); \ + __locked = spin_trylock(this_cpu_ptr((lock))); \ + if (!__locked) \ + migrate_enable(); \ + } \ + __locked; \ + }) + +#define __local_trylock_irqsave(lock, flags) \ + ({ \ + typecheck(unsigned long, flags); \ + flags = 0; \ + __local_trylock(lock); \ + }) + #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/lockref.h b/include/linux/lockref.h index c3a1f78bc884..676721ee878d 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -34,14 +34,25 @@ struct lockref { }; }; -extern void lockref_get(struct lockref *); -extern int lockref_put_return(struct lockref *); -extern int lockref_get_not_zero(struct lockref *); -extern int lockref_put_not_zero(struct lockref *); -extern int lockref_put_or_lock(struct lockref *); - -extern void lockref_mark_dead(struct lockref *); -extern int lockref_get_not_dead(struct lockref *); +/** + * lockref_init - Initialize a lockref + * @lockref: pointer to lockref structure + * + * Initializes @lockref->count to 1. + */ +static inline void lockref_init(struct lockref *lockref) +{ + spin_lock_init(&lockref->lock); + lockref->count = 1; +} + +void lockref_get(struct lockref *lockref); +int lockref_put_return(struct lockref *lockref); +bool lockref_get_not_zero(struct lockref *lockref); +bool lockref_put_or_lock(struct lockref *lockref); + +void lockref_mark_dead(struct lockref *lockref); +bool lockref_get_not_dead(struct lockref *lockref); /* Must be called under spinlock for reliable results */ static inline bool __lockref_is_dead(const struct lockref *l) diff --git a/include/linux/log2.h b/include/linux/log2.h index 9f30d087a128..1366cb688a6d 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -41,7 +41,7 @@ int __ilog2_u64(u64 n) * *not* considered a power of two. * Return: true if @n is a power of 2, otherwise false. */ -static inline __attribute__((const)) +static __always_inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 97a8b21eb033..382c56a97bba 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -5,7 +5,7 @@ * * Author : Etienne BASSET <etienne.basset@ensta.org> * - * All credits to : Stephen Smalley, <sds@tycho.nsa.gov> + * All credits to : Stephen Smalley * All BUGS to : Etienne BASSET <etienne.basset@ensta.org> */ #ifndef _LSM_COMMON_LOGGING_ @@ -77,6 +77,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_LOCKDOWN 15 #define LSM_AUDIT_DATA_NOTIFICATION 16 #define LSM_AUDIT_DATA_ANONINODE 17 +#define LSM_AUDIT_DATA_NLMSGTYPE 18 union { struct path path; struct dentry *dentry; @@ -98,6 +99,7 @@ struct common_audit_data { struct lsm_ibendport_audit *ibendport; int reason; const char *anonclass; + u16 nlmsg_type; } u; /* this union contains LSM specific data */ union { @@ -116,14 +118,36 @@ struct common_audit_data { #define v4info fam.v4 #define v6info fam.v6 +#ifdef CONFIG_AUDIT + int ipv4_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto); +#if IS_ENABLED(CONFIG_IPV6) int ipv6_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto); +#endif /* IS_ENABLED(CONFIG_IPV6) */ void common_lsm_audit(struct common_audit_data *a, void (*pre_audit)(struct audit_buffer *, void *), void (*post_audit)(struct audit_buffer *, void *)); +void audit_log_lsm_data(struct audit_buffer *ab, + const struct common_audit_data *a); + +#else /* CONFIG_AUDIT */ + +static inline void common_lsm_audit(struct common_audit_data *a, + void (*pre_audit)(struct audit_buffer *, void *), + void (*post_audit)(struct audit_buffer *, void *)) +{ +} + +static inline void audit_log_lsm_data(struct audit_buffer *ab, + const struct common_audit_data *a) +{ +} + +#endif /* CONFIG_AUDIT */ + #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index eb2937599cb0..bf3bbac4e02a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -83,7 +83,7 @@ LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, u32 *ctxlen) + struct lsm_context *cp) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) @@ -295,17 +295,16 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) -LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, - u32 *seclen) +LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, struct lsm_context *cp) LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) -LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) +LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, - u32 *ctxlen) +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, + struct lsm_context *cp) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, @@ -427,14 +426,14 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) #endif /* CONFIG_AUDIT */ #ifdef CONFIG_BPF_SYSCALL -LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) +LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, const struct path *path) @@ -446,7 +445,7 @@ LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) #ifdef CONFIG_PERF_EVENTS -LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) +LSM_HOOK(int, 0, perf_event_open, int type) LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) @@ -456,6 +455,7 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) +LSM_HOOK(int, 0, uring_allowed, void) #endif /* CONFIG_IO_URING */ LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void) diff --git a/include/linux/lz4.h b/include/linux/lz4.h index b16e15b9587a..ad6042a718b5 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -645,4 +645,10 @@ int LZ4_decompress_safe_usingDict(const char *source, char *dest, int LZ4_decompress_fast_usingDict(const char *source, char *dest, int originalSize, const char *dictStart, int dictSize); +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + #endif diff --git a/include/linux/lzo.h b/include/linux/lzo.h index e95c7d1092b2..4d30e3624acd 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -24,10 +24,18 @@ int lzo1x_1_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem); +/* Same as above but does not write more than dst_len to dst. */ +int lzo1x_1_compress_safe(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzorle1x_1_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem); +/* Same as above but does not write more than dst_len to dst. */ +int lzorle1x_1_compress_safe(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len); diff --git a/include/linux/mailbox/exynos-message.h b/include/linux/mailbox/exynos-message.h new file mode 100644 index 000000000000..5a9ed5ce2046 --- /dev/null +++ b/include/linux/mailbox/exynos-message.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Exynos mailbox message. + * + * Copyright 2024 Linaro Ltd. + */ + +#ifndef _LINUX_EXYNOS_MESSAGE_H_ +#define _LINUX_EXYNOS_MESSAGE_H_ + +#define EXYNOS_MBOX_CHAN_TYPE_DOORBELL 0 +#define EXYNOS_MBOX_CHAN_TYPE_DATA 1 + +struct exynos_mbox_msg { + unsigned int chan_id; + unsigned int chan_type; +}; + +#endif /* _LINUX_EXYNOS_MESSAGE_H_ */ diff --git a/include/linux/mailbox/mchp-ipc.h b/include/linux/mailbox/mchp-ipc.h new file mode 100644 index 000000000000..f084ac9e291b --- /dev/null +++ b/include/linux/mailbox/mchp-ipc.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + *Copyright (c) 2024 Microchip Technology Inc. All rights reserved. + */ + +#ifndef _LINUX_MCHP_IPC_H_ +#define _LINUX_MCHP_IPC_H_ + +#include <linux/mailbox_controller.h> +#include <linux/types.h> + +struct mchp_ipc_msg { + u32 *buf; + u16 size; +}; + +struct mchp_ipc_sbi_chan { + void *buf_base_tx; + void *buf_base_rx; + void *msg_buf_tx; + void *msg_buf_rx; + phys_addr_t buf_base_tx_addr; + phys_addr_t buf_base_rx_addr; + phys_addr_t msg_buf_tx_addr; + phys_addr_t msg_buf_rx_addr; + int chan_aggregated_irq; + int mp_irq; + int mc_irq; + u32 id; + u32 max_msg_size; +}; + +#endif /* _LINUX_MCHP_IPC_H_ */ diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index a8f0070c7aa9..4c1a91b07de3 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -75,7 +75,6 @@ struct cmdq_pkt { dma_addr_t pa_base; size_t cmd_buf_size; /* command occupied size */ size_t buf_size; /* real buffer size */ - void *cl; }; u8 cmdq_get_shift_pa(struct mbox_chan *chan); diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index 734694912ef7..c6eea9afb943 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -7,8 +7,8 @@ #ifndef __MAILBOX_CLIENT_H #define __MAILBOX_CLIENT_H -#include <linux/of.h> #include <linux/device.h> +#include <linux/of.h> struct mbox_chan; diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 6fee33cb52f5..5fb0b65f45a2 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -3,11 +3,11 @@ #ifndef __MAILBOX_CONTROLLER_H #define __MAILBOX_CONTROLLER_H +#include <linux/completion.h> +#include <linux/device.h> +#include <linux/hrtimer.h> #include <linux/of.h> #include <linux/types.h> -#include <linux/hrtimer.h> -#include <linux/device.h> -#include <linux/completion.h> struct mbox_chan; diff --git a/include/linux/math.h b/include/linux/math.h index f5f18dc3616b..0198c92cbe3e 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -34,6 +34,18 @@ */ #define round_down(x, y) ((x) & ~__round_mask(x, y)) +/** + * DIV_ROUND_UP_POW2 - divide and round up + * @n: numerator + * @d: denominator (must be a power of 2) + * + * Divides @n by @d and rounds up to next multiple of @d (which must be a power + * of 2). Avoids integer overflows that may occur with __KERNEL_DIV_ROUND_UP(). + * Performance is roughly equivalent to __KERNEL_DIV_ROUND_UP(). + */ +#define DIV_ROUND_UP_POW2(n, d) \ + ((n) / (d) + !!((n) & ((d) - 1))) + #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP #define DIV_ROUND_DOWN_ULL(ll, d) \ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3c3deac57894..e43ff9f980a4 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -45,10 +45,7 @@ struct mdio_device { unsigned int reset_deassert_delay; }; -static inline struct mdio_device *to_mdio_device(const struct device *dev) -{ - return container_of(dev, struct mdio_device, dev); -} +#define to_mdio_device(__dev) container_of_const(__dev, struct mdio_device, dev) /* struct mdio_driver_common: Common to all MDIO drivers */ struct mdio_driver_common { diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index b38a56a13f39..725fd7727422 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -97,8 +97,6 @@ ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf, size_t length, unsigned long timeout); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); -ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, - size_t length); ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, unsigned long timeout); ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf, @@ -107,8 +105,6 @@ ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf, size_t length, u8 vtag, unsigned long timeout); ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); -ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, - size_t length, u8 *vtag); ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag, unsigned long timeout); @@ -116,7 +112,6 @@ int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, mei_cldev_cb_t notif_cb); -const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev); u8 mei_cldev_ver(const struct mei_cl_device *cldev); void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev); diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index ae4526389261..07584c5e36fb 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -26,11 +26,34 @@ */ #define __sme_set(x) ((x) | sme_me_mask) #define __sme_clr(x) ((x) & ~sme_me_mask) + +#define dma_addr_encrypted(x) __sme_set(x) +#define dma_addr_canonical(x) __sme_clr(x) + #else #define __sme_set(x) (x) #define __sme_clr(x) (x) #endif +/* + * dma_addr_encrypted() and dma_addr_unencrypted() are for converting a given DMA + * address to the respective type of addressing. + * + * dma_addr_canonical() is used to reverse any conversions for encrypted/decrypted + * back to the canonical address. + */ +#ifndef dma_addr_encrypted +#define dma_addr_encrypted(x) (x) +#endif + +#ifndef dma_addr_unencrypted +#define dma_addr_unencrypted(x) (x) +#endif + +#ifndef dma_addr_canonical +#define dma_addr_canonical(x) (x) +#endif + #endif /* __ASSEMBLY__ */ #endif /* __MEM_ENCRYPT_H__ */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 673d5cae7c81..ef5a1ecc6e59 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -133,7 +133,6 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); -void memblock_free_all(void); void memblock_free(void *ptr, size_t size); void reset_all_zones_managed_pages(void); @@ -378,6 +377,10 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 +/* + * MEMBLOCK_ALLOC_NOLEAKTRACE avoids kmemleak tracing. It implies + * MEMBLOCK_ALLOC_ACCESSIBLE + */ #define MEMBLOCK_ALLOC_NOLEAKTRACE 1 /* We are using top down, so it is safe to use 0 here */ @@ -417,6 +420,12 @@ static __always_inline void *memblock_alloc(phys_addr_t size, phys_addr_t align) MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } +void *__memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align, + const char *func); + +#define memblock_alloc_or_panic(size, align) \ + __memblock_alloc_or_panic(size, align, __func__) + static inline void *memblock_alloc_raw(phys_addr_t size, phys_addr_t align) { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5502aa8e138e..53364526d877 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -438,9 +438,7 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) */ static inline bool folio_memcg_charged(struct folio *folio) { - if (folio_memcg_kmem(folio)) - return __folio_objcg(folio) != NULL; - return __folio_memcg(folio) != NULL; + return folio->memcg_data != 0; } /* @@ -620,8 +618,6 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); } -void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg); - int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); /** @@ -646,14 +642,11 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } -int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, - long nr_pages); +int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); - void __mem_cgroup_uncharge(struct folio *folio); /** @@ -677,7 +670,6 @@ static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) __mem_cgroup_uncharge_folios(folios); } -void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_replace_folio(struct folio *old, struct folio *new); void mem_cgroup_migrate(struct folio *old, struct folio *new); @@ -1044,7 +1036,26 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -void split_page_memcg(struct page *head, int old_order, int new_order); +void split_page_memcg(struct page *first, unsigned order); +void folio_split_memcg_refs(struct folio *folio, unsigned old_order, + unsigned new_order); + +static inline u64 cgroup_id_from_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + u64 id; + + if (mem_cgroup_disabled()) + return 0; + + rcu_read_lock(); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (!memcg) + memcg = root_mem_cgroup; + id = cgroup_id(memcg->css.cgroup); + rcu_read_unlock(); + return id; +} #else /* CONFIG_MEMCG */ @@ -1135,21 +1146,15 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; } -static inline void mem_cgroup_commit_charge(struct folio *folio, - struct mem_cgroup *memcg) -{ -} - static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { return 0; } -static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, - gfp_t gfp, long nr_pages) +static inline int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp) { - return 0; + return 0; } static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, @@ -1158,10 +1163,6 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, return 0; } -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr) -{ -} - static inline void mem_cgroup_uncharge(struct folio *folio) { } @@ -1170,11 +1171,6 @@ static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { } -static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, - unsigned int nr_pages) -{ -} - static inline void mem_cgroup_replace_folio(struct folio *old, struct folio *new) { @@ -1463,8 +1459,18 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void split_page_memcg(struct page *head, int old_order, int new_order) +static inline void split_page_memcg(struct page *first, unsigned order) +{ +} + +static inline void folio_split_memcg_refs(struct folio *folio, + unsigned old_order, unsigned new_order) +{ +} + +static inline u64 cgroup_id_from_mm(struct mm_struct *mm) { + return 0; } #endif /* CONFIG_MEMCG */ @@ -1841,6 +1847,9 @@ static inline void mem_cgroup_exit_user_fault(void) current->in_user_fault = 0; } +void memcg1_swapout(struct folio *folio, swp_entry_t entry); +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages); + #else /* CONFIG_MEMCG_V1 */ static inline unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -1868,6 +1877,14 @@ static inline void mem_cgroup_exit_user_fault(void) { } +static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry) +{ +} + +static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) +{ +} + #endif /* CONFIG_MEMCG_V1 */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/memfd.h b/include/linux/memfd.h index d437e3070850..246daadbfde8 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -7,7 +7,14 @@ #ifdef CONFIG_MEMFD_CREATE extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg); struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); -unsigned int *memfd_file_seals_ptr(struct file *file); +/* + * Check for any existing seals on mmap, return an error if access is denied due + * to sealing, or 0 otherwise. + * + * We also update VMA flags if appropriate by manipulating the VMA flags pointed + * to by vm_flags_ptr. + */ +int memfd_check_seals_mmap(struct file *file, unsigned long *vm_flags_ptr); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { @@ -17,19 +24,11 @@ static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) { return ERR_PTR(-EINVAL); } - -static inline unsigned int *memfd_file_seals_ptr(struct file *file) +static inline int memfd_check_seals_mmap(struct file *file, + unsigned long *vm_flags_ptr) { - return NULL; + return 0; } #endif -/* Retrieve memfd seals associated with the file, if any. */ -static inline unsigned int memfd_file_seals(struct file *file) -{ - unsigned int *sealsp = memfd_file_seals_ptr(file); - - return sealsp ? *sealsp : 0; -} - #endif /* __LINUX_MEMFD_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index c0afee5d126e..12daa6ec7d09 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -25,7 +25,7 @@ /** * struct memory_group - a logical group of memory blocks * @nid: The node id for all memory blocks inside the memory group. - * @blocks: List of all memory blocks belonging to this memory group. + * @memory_blocks: List of all memory blocks belonging to this memory group. * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this * memory group. * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this diff --git a/include/linux/memory/ti-aemif.h b/include/linux/memory/ti-aemif.h new file mode 100644 index 000000000000..da94a9d985e7 --- /dev/null +++ b/include/linux/memory/ti-aemif.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __MEMORY_TI_AEMIF_H +#define __MEMORY_TI_AEMIF_H + +/** + * struct aemif_cs_timings: structure to hold CS timing configuration + * values are expressed in number of clock cycles - 1 + * @ta: minimum turn around time + * @rhold: read hold width + * @rstrobe: read strobe width + * @rsetup: read setup width + * @whold: write hold width + * @wstrobe: write strobe width + * @wsetup: write setup width + */ +struct aemif_cs_timings { + u32 ta; + u32 rhold; + u32 rstrobe; + u32 rsetup; + u32 whold; + u32 wstrobe; + u32 wsetup; +}; + +struct aemif_device; + +int aemif_set_cs_timings(struct aemif_device *aemif, u8 cs, struct aemif_cs_timings *timings); +int aemif_check_cs_timings(struct aemif_cs_timings *timings); + +#endif // __MEMORY_TI_AEMIF_H diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index b27ddce5d324..eaac5ae8c05c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -144,8 +144,6 @@ extern u64 max_mem_size; extern int mhp_online_type_from_str(const char *str); -/* Default online_type (MMOP_*) when new memory blocks are added. */ -extern int mhp_default_online_type; /* If movable_node boot option specified */ extern bool movable_node_enabled; static inline bool movable_node_is_enabled(void) @@ -303,6 +301,9 @@ static inline void __remove_memory(u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_MEMORY_HOTPLUG +/* Default online_type (MMOP_*) when new memory blocks are added. */ +extern int mhp_get_default_online_type(void); +extern void mhp_set_default_online_type(int online_type); extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat); extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 3f7143ade32c..4aa151914eab 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PRIVATE; + page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE; } static inline bool folio_is_device_private(const struct folio *folio) @@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; + page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; } static inline bool is_device_coherent_page(const struct page *page) { return is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_COHERENT; + page_pgmap(page)->type == MEMORY_DEVICE_COHERENT; } static inline bool folio_is_device_coherent(const struct folio *folio) @@ -187,6 +187,17 @@ static inline bool folio_is_device_coherent(const struct folio *folio) return is_device_coherent_page(&folio->page); } +static inline bool is_fsdax_page(const struct page *page) +{ + return is_zone_device_page(page) && + page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX; +} + +static inline bool folio_is_fsdax(const struct folio *folio) +{ + return is_fsdax_page(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE void zone_device_page_init(struct page *page); void *memremap_pages(struct dev_pagemap *pgmap, int nid); diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index c3df0e615fbf..3c5aecf1d4b5 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -137,6 +137,7 @@ enum axp20x_variants { #define AXP717_IRQ2_STATE 0x4a #define AXP717_IRQ3_STATE 0x4b #define AXP717_IRQ4_STATE 0x4c +#define AXP717_TS_PIN_CFG 0x50 #define AXP717_ICC_CHG_SET 0x62 #define AXP717_ITERM_CHG_SET 0x63 #define AXP717_CV_CHG_SET 0x64 diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index e8bcad641d8c..faeea7abd688 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -72,7 +72,7 @@ struct mfd_cell { int (*resume)(struct platform_device *dev); /* platform data passed to the sub devices drivers */ - void *platform_data; + const void *platform_data; size_t pdata_size; /* Matches ACPI */ diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index 76feb3a7066d..9cb2fc2938ce 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -93,6 +93,8 @@ struct da9052 { int chip_irq; + int fault_log; + /* SOC I/O transfer related fixes for DA9052/53 */ int (*fix_io) (struct da9052 *da9052, unsigned char reg); }; diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index dd0fc891b228..98567623c9df 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -215,7 +215,7 @@ struct prcmu_fw_version { static inline void prcmu_early_init(void) { - return db8500_prcmu_early_init(); + db8500_prcmu_early_init(); } static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk, @@ -302,7 +302,7 @@ static inline int prcmu_request_ape_opp_100_voltage(bool enable) static inline void prcmu_system_reset(u16 reset_code) { - return db8500_prcmu_system_reset(reset_code); + db8500_prcmu_system_reset(reset_code); } static inline u16 prcmu_get_reset_code(void) @@ -314,7 +314,7 @@ int prcmu_ac_wake_req(void); void prcmu_ac_sleep_req(void); static inline void prcmu_modem_reset(void) { - return db8500_prcmu_modem_reset(); + db8500_prcmu_modem_reset(); } static inline bool prcmu_is_ac_wake_requested(void) diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h index ffde195e12b7..ea51b1cdca5a 100644 --- a/include/linux/mfd/ezx-pcap.h +++ b/include/linux/mfd/ezx-pcap.h @@ -31,7 +31,6 @@ int ezx_pcap_set_bits(struct pcap_chip *, u8, u32, u32); int pcap_to_irq(struct pcap_chip *, int); int irq_to_pcap(struct pcap_chip *, int); int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *); -int pcap_adc_sync(struct pcap_chip *, u8, u32, u8[], u16[]); void pcap_set_ts_bits(struct pcap_chip *, u32); #define PCAP_SECOND_PORT 1 diff --git a/include/linux/mfd/lp3943.h b/include/linux/mfd/lp3943.h index 020a339f96e8..402f01078fcc 100644 --- a/include/linux/mfd/lp3943.h +++ b/include/linux/mfd/lp3943.h @@ -11,7 +11,6 @@ #define __MFD_LP3943_H__ #include <linux/gpio.h> -#include <linux/pwm.h> #include <linux/regmap.h> /* Registers */ diff --git a/include/linux/mfd/max77693-common.h b/include/linux/mfd/max77693-common.h index a5bce099f1ed..ec2e1b2dceb8 100644 --- a/include/linux/mfd/max77693-common.h +++ b/include/linux/mfd/max77693-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * Common data shared between Maxim 77693 and 77843 drivers + * Common data shared between Maxim 77693, 77705 and 77843 drivers * * Copyright (C) 2015 Samsung Electronics */ @@ -11,6 +11,7 @@ enum max77693_types { TYPE_MAX77693_UNKNOWN, TYPE_MAX77693, + TYPE_MAX77705, TYPE_MAX77843, TYPE_MAX77693_NUM, @@ -32,6 +33,7 @@ struct max77693_dev { struct regmap *regmap_muic; struct regmap *regmap_haptic; /* Only MAX77693 */ struct regmap *regmap_chg; /* Only MAX77843 */ + struct regmap *regmap_leds; /* Only MAX77705 */ struct regmap_irq_chip_data *irq_data_led; struct regmap_irq_chip_data *irq_data_topsys; diff --git a/include/linux/mfd/max77705-private.h b/include/linux/mfd/max77705-private.h new file mode 100644 index 000000000000..214de7feeb8c --- /dev/null +++ b/include/linux/mfd/max77705-private.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Maxim MAX77705 definitions. + * + * Copyright (C) 2015 Samsung Electronics, Inc. + * Copyright (C) 2025 Dzmitry Sankouski <dsankouski@gmail.com> + */ + +#ifndef __LINUX_MFD_MAX77705_PRIV_H +#define __LINUX_MFD_MAX77705_PRIV_H + +#define MAX77705_SRC_IRQ_CHG BIT(0) +#define MAX77705_SRC_IRQ_TOP BIT(1) +#define MAX77705_SRC_IRQ_FG BIT(2) +#define MAX77705_SRC_IRQ_USBC BIT(3) +#define MAX77705_SRC_IRQ_ALL (MAX77705_SRC_IRQ_CHG | MAX77705_SRC_IRQ_TOP | \ + MAX77705_SRC_IRQ_FG | MAX77705_SRC_IRQ_USBC) + +/* MAX77705_PMIC_REG_PMICREV register */ +#define MAX77705_VERSION_SHIFT 3 +#define MAX77705_REVISION_MASK GENMASK(2, 0) +#define MAX77705_VERSION_MASK GENMASK(7, MAX77705_VERSION_SHIFT) +/* MAX77705_PMIC_REG_MAINCTRL1 register */ +#define MAX77705_MAINCTRL1_BIASEN_SHIFT 7 +#define MAX77705_MAINCTRL1_BIASEN_MASK BIT(MAX77705_MAINCTRL1_BIASEN_SHIFT) +/* MAX77705_PMIC_REG_MCONFIG2 (haptics) register */ +#define MAX77705_CONFIG2_MEN_SHIFT 6 +#define MAX77705_CONFIG2_MODE_SHIFT 7 +#define MAX77705_CONFIG2_HTYP_SHIFT 5 +/* MAX77705_PMIC_REG_SYSTEM_INT_MASK register */ +#define MAX77705_SYSTEM_IRQ_BSTEN_INT BIT(3) +#define MAX77705_SYSTEM_IRQ_SYSUVLO_INT BIT(4) +#define MAX77705_SYSTEM_IRQ_SYSOVLO_INT BIT(5) +#define MAX77705_SYSTEM_IRQ_TSHDN_INT BIT(6) +#define MAX77705_SYSTEM_IRQ_TM_INT BIT(7) +/* MAX77705_RGBLED_REG_LEDEN register */ +#define MAX77705_RGBLED_EN_WIDTH 2 +/* MAX77705_RGBLED_REG_LEDBLNK register */ +#define MAX77705_RGB_DELAY_100_STEP_LIM 500 +#define MAX77705_RGB_DELAY_100_STEP_COUNT 4 +#define MAX77705_RGB_DELAY_100_STEP 100 +#define MAX77705_RGB_DELAY_250_STEP_LIM 3250 +#define MAX77705_RGB_DELAY_250_STEP 250 +#define MAX77705_RGB_DELAY_500_STEP 500 +#define MAX77705_RGB_DELAY_500_STEP_COUNT 10 +#define MAX77705_RGB_DELAY_500_STEP_LIM 5000 +#define MAX77705_RGB_DELAY_1000_STEP_LIM 8000 +#define MAX77705_RGB_DELAY_1000_STEP_COUNT 13 +#define MAX77705_RGB_DELAY_1000_STEP 1000 +#define MAX77705_RGB_DELAY_2000_STEP 2000 +#define MAX77705_RGB_DELAY_2000_STEP_COUNT 13 +#define MAX77705_RGB_DELAY_2000_STEP_LIM 12000 + +enum max77705_hw_rev { + MAX77705_PASS1 = 1, + MAX77705_PASS2, + MAX77705_PASS3 +}; + +enum max77705_reg { + MAX77705_PMIC_REG_PMICID1 = 0x00, + MAX77705_PMIC_REG_PMICREV = 0x01, + MAX77705_PMIC_REG_MAINCTRL1 = 0x02, + MAX77705_PMIC_REG_BSTOUT_MASK = 0x03, + MAX77705_PMIC_REG_FORCE_EN_MASK = 0x08, + MAX77705_PMIC_REG_MCONFIG = 0x10, + MAX77705_PMIC_REG_MCONFIG2 = 0x11, + MAX77705_PMIC_REG_INTSRC = 0x22, + MAX77705_PMIC_REG_INTSRC_MASK = 0x23, + MAX77705_PMIC_REG_SYSTEM_INT = 0x24, + MAX77705_PMIC_REG_RESERVED_25 = 0x25, + MAX77705_PMIC_REG_SYSTEM_INT_MASK = 0x26, + MAX77705_PMIC_REG_RESERVED_27 = 0x27, + MAX77705_PMIC_REG_RESERVED_28 = 0x28, + MAX77705_PMIC_REG_RESERVED_29 = 0x29, + MAX77705_PMIC_REG_BOOSTCONTROL1 = 0x4C, + MAX77705_PMIC_REG_BOOSTCONTROL2 = 0x4F, + MAX77705_PMIC_REG_SW_RESET = 0x50, + MAX77705_PMIC_REG_USBC_RESET = 0x51, + + MAX77705_PMIC_REG_END +}; + +enum max77705_chg_reg { + MAX77705_CHG_REG_BASE = 0xB0, + MAX77705_CHG_REG_INT = 0, + MAX77705_CHG_REG_INT_MASK, + MAX77705_CHG_REG_INT_OK, + MAX77705_CHG_REG_DETAILS_00, + MAX77705_CHG_REG_DETAILS_01, + MAX77705_CHG_REG_DETAILS_02, + MAX77705_CHG_REG_DTLS_03, + MAX77705_CHG_REG_CNFG_00, + MAX77705_CHG_REG_CNFG_01, + MAX77705_CHG_REG_CNFG_02, + MAX77705_CHG_REG_CNFG_03, + MAX77705_CHG_REG_CNFG_04, + MAX77705_CHG_REG_CNFG_05, + MAX77705_CHG_REG_CNFG_06, + MAX77705_CHG_REG_CNFG_07, + MAX77705_CHG_REG_CNFG_08, + MAX77705_CHG_REG_CNFG_09, + MAX77705_CHG_REG_CNFG_10, + MAX77705_CHG_REG_CNFG_11, + + MAX77705_CHG_REG_CNFG_12, + MAX77705_CHG_REG_CNFG_13, + MAX77705_CHG_REG_CNFG_14, + MAX77705_CHG_REG_SAFEOUT_CTRL +}; + +enum max77705_fuelgauge_reg { + STATUS_REG = 0x00, + VALRT_THRESHOLD_REG = 0x01, + TALRT_THRESHOLD_REG = 0x02, + SALRT_THRESHOLD_REG = 0x03, + REMCAP_REP_REG = 0x05, + SOCREP_REG = 0x06, + TEMPERATURE_REG = 0x08, + VCELL_REG = 0x09, + TIME_TO_EMPTY_REG = 0x11, + FULLSOCTHR_REG = 0x13, + CURRENT_REG = 0x0A, + AVG_CURRENT_REG = 0x0B, + SOCMIX_REG = 0x0D, + SOCAV_REG = 0x0E, + REMCAP_MIX_REG = 0x0F, + FULLCAP_REG = 0x10, + RFAST_REG = 0x15, + AVR_TEMPERATURE_REG = 0x16, + CYCLES_REG = 0x17, + DESIGNCAP_REG = 0x18, + AVR_VCELL_REG = 0x19, + TIME_TO_FULL_REG = 0x20, + CONFIG_REG = 0x1D, + ICHGTERM_REG = 0x1E, + REMCAP_AV_REG = 0x1F, + FULLCAP_NOM_REG = 0x23, + LEARN_CFG_REG = 0x28, + FILTER_CFG_REG = 0x29, + MISCCFG_REG = 0x2B, + QRTABLE20_REG = 0x32, + FULLCAP_REP_REG = 0x35, + RCOMP_REG = 0x38, + VEMPTY_REG = 0x3A, + FSTAT_REG = 0x3D, + DISCHARGE_THRESHOLD_REG = 0x40, + QRTABLE30_REG = 0x42, + ISYS_REG = 0x43, + DQACC_REG = 0x45, + DPACC_REG = 0x46, + AVGISYS_REG = 0x4B, + QH_REG = 0x4D, + VSYS_REG = 0xB1, + TALRTTH2_REG = 0xB2, + VBYP_REG = 0xB3, + CONFIG2_REG = 0xBB, + IIN_REG = 0xD0, + OCV_REG = 0xEE, + VFOCV_REG = 0xFB, + VFSOC_REG = 0xFF, + + MAX77705_FG_END +}; + +enum max77705_led_reg { + MAX77705_RGBLED_REG_BASE = 0x30, + MAX77705_RGBLED_REG_LEDEN = 0, + MAX77705_RGBLED_REG_LED0BRT, + MAX77705_RGBLED_REG_LED1BRT, + MAX77705_RGBLED_REG_LED2BRT, + MAX77705_RGBLED_REG_LED3BRT, + MAX77705_RGBLED_REG_LEDRMP, + MAX77705_RGBLED_REG_LEDBLNK, + MAX77705_LED_REG_END +}; + +enum max77705_charger_battery_state { + MAX77705_BATTERY_NOBAT, + MAX77705_BATTERY_PREQUALIFICATION, + MAX77705_BATTERY_DEAD, + MAX77705_BATTERY_GOOD, + MAX77705_BATTERY_LOWVOLTAGE, + MAX77705_BATTERY_OVERVOLTAGE, + MAX77705_BATTERY_RESERVED +}; + +enum max77705_charger_charge_type { + MAX77705_CHARGER_CONSTANT_CURRENT = 1, + MAX77705_CHARGER_CONSTANT_VOLTAGE, + MAX77705_CHARGER_END_OF_CHARGE, + MAX77705_CHARGER_DONE +}; + +#endif /* __LINUX_MFD_MAX77705_PRIV_H */ diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h index a10cd6945232..f70eea0f2264 100644 --- a/include/linux/mfd/max8997-private.h +++ b/include/linux/mfd/max8997-private.h @@ -397,7 +397,6 @@ enum max8997_types { }; extern int max8997_irq_init(struct max8997_dev *max8997); -extern void max8997_irq_exit(struct max8997_dev *max8997); extern int max8997_irq_resume(struct max8997_dev *max8997); extern int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest); diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h index 068ae1c0f0e8..27883af44f87 100644 --- a/include/linux/mfd/mt6397/rtc.h +++ b/include/linux/mfd/mt6397/rtc.h @@ -60,11 +60,6 @@ #define RTC_PDN2 0x002e #define RTC_PDN2_PWRON_ALARM BIT(4) -#define RTC_MIN_YEAR 1968 -#define RTC_BASE_YEAR 1900 -#define RTC_NUM_YEARS 128 -#define RTC_MIN_YEAR_OFFSET (RTC_MIN_YEAR - RTC_BASE_YEAR) - #define MTK_RTC_POLL_DELAY_US 10 #define MTK_RTC_POLL_TIMEOUT (jiffies_to_usecs(HZ)) diff --git a/include/linux/mfd/pcf50633/adc.h b/include/linux/mfd/pcf50633/adc.h deleted file mode 100644 index 6a81896d4889..000000000000 --- a/include/linux/mfd/pcf50633/adc.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * adc.h -- Driver for NXP PCF50633 ADC - * - * (C) 2006-2008 by Openmoko, Inc. - * All rights reserved. - */ - -#ifndef __LINUX_MFD_PCF50633_ADC_H -#define __LINUX_MFD_PCF50633_ADC_H - -#include <linux/mfd/pcf50633/core.h> -#include <linux/platform_device.h> - -/* ADC Registers */ -#define PCF50633_REG_ADCC3 0x52 -#define PCF50633_REG_ADCC2 0x53 -#define PCF50633_REG_ADCC1 0x54 -#define PCF50633_REG_ADCS1 0x55 -#define PCF50633_REG_ADCS2 0x56 -#define PCF50633_REG_ADCS3 0x57 - -#define PCF50633_ADCC1_ADCSTART 0x01 -#define PCF50633_ADCC1_RES_8BIT 0x02 -#define PCF50633_ADCC1_RES_10BIT 0x00 -#define PCF50633_ADCC1_AVERAGE_NO 0x00 -#define PCF50633_ADCC1_AVERAGE_4 0x04 -#define PCF50633_ADCC1_AVERAGE_8 0x08 -#define PCF50633_ADCC1_AVERAGE_16 0x0c -#define PCF50633_ADCC1_MUX_BATSNS_RES 0x00 -#define PCF50633_ADCC1_MUX_BATSNS_SUBTR 0x10 -#define PCF50633_ADCC1_MUX_ADCIN2_RES 0x20 -#define PCF50633_ADCC1_MUX_ADCIN2_SUBTR 0x30 -#define PCF50633_ADCC1_MUX_BATTEMP 0x60 -#define PCF50633_ADCC1_MUX_ADCIN1 0x70 -#define PCF50633_ADCC1_AVERAGE_MASK 0x0c -#define PCF50633_ADCC1_ADCMUX_MASK 0xf0 - -#define PCF50633_ADCC2_RATIO_NONE 0x00 -#define PCF50633_ADCC2_RATIO_BATTEMP 0x01 -#define PCF50633_ADCC2_RATIO_ADCIN1 0x02 -#define PCF50633_ADCC2_RATIO_BOTH 0x03 -#define PCF50633_ADCC2_RATIOSETTL_100US 0x04 - -#define PCF50633_ADCC3_ACCSW_EN 0x01 -#define PCF50633_ADCC3_NTCSW_EN 0x04 -#define PCF50633_ADCC3_RES_DIV_TWO 0x10 -#define PCF50633_ADCC3_RES_DIV_THREE 0x00 - -#define PCF50633_ADCS3_REF_NTCSW 0x00 -#define PCF50633_ADCS3_REF_ACCSW 0x10 -#define PCF50633_ADCS3_REF_2V0 0x20 -#define PCF50633_ADCS3_REF_VISA 0x30 -#define PCF50633_ADCS3_REF_2V0_2 0x70 -#define PCF50633_ADCS3_ADCRDY 0x80 - -#define PCF50633_ADCS3_ADCDAT1L_MASK 0x03 -#define PCF50633_ADCS3_ADCDAT2L_MASK 0x0c -#define PCF50633_ADCS3_ADCDAT2L_SHIFT 2 -#define PCF50633_ASCS3_REF_MASK 0x70 - -extern int -pcf50633_adc_async_read(struct pcf50633 *pcf, int mux, int avg, - void (*callback)(struct pcf50633 *, void *, int), - void *callback_param); -extern int -pcf50633_adc_sync_read(struct pcf50633 *pcf, int mux, int avg); - -#endif /* __LINUX_PCF50633_ADC_H */ diff --git a/include/linux/mfd/pcf50633/backlight.h b/include/linux/mfd/pcf50633/backlight.h deleted file mode 100644 index fd4a4f8d6c13..000000000000 --- a/include/linux/mfd/pcf50633/backlight.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de> - * PCF50633 backlight device driver - */ - -#ifndef __LINUX_MFD_PCF50633_BACKLIGHT -#define __LINUX_MFD_PCF50633_BACKLIGHT - -/* -* @default_brightness: Backlight brightness is initialized to this value -* -* Brightness to be used after the driver has been probed. -* Valid range 0-63. -* -* @default_brightness_limit: The actual brightness is limited by this value -* -* Brightness limit to be used after the driver has been probed. This is useful -* when it is not known how much power is available for the backlight during -* probe. -* Valid range 0-63. Can be changed later with pcf50633_bl_set_brightness_limit. -* -* @ramp_time: Display ramp time when changing brightness -* -* When changing the backlights brightness the change is not instant, instead -* it fades smooth from one state to another. This value specifies how long -* the fade should take. The lower the value the higher the fade time. -* Valid range 0-255 -*/ -struct pcf50633_bl_platform_data { - unsigned int default_brightness; - unsigned int default_brightness_limit; - uint8_t ramp_time; -}; - - -struct pcf50633; - -int pcf50633_bl_set_brightness_limit(struct pcf50633 *pcf, unsigned int limit); - -#endif - diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h index 539f27f8bd89..42d2b0e4884e 100644 --- a/include/linux/mfd/pcf50633/core.h +++ b/include/linux/mfd/pcf50633/core.h @@ -15,7 +15,6 @@ #include <linux/regulator/machine.h> #include <linux/pm.h> #include <linux/power_supply.h> -#include <linux/mfd/pcf50633/backlight.h> struct pcf50633; struct regmap; @@ -42,8 +41,6 @@ struct pcf50633_platform_data { void (*force_shutdown)(struct pcf50633 *); u8 resumers[5]; - - struct pcf50633_bl_platform_data *backlight_data; }; struct pcf50633_irq { diff --git a/include/linux/mfd/pcf50633/gpio.h b/include/linux/mfd/pcf50633/gpio.h deleted file mode 100644 index f589e35795f1..000000000000 --- a/include/linux/mfd/pcf50633/gpio.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * gpio.h -- GPIO driver for NXP PCF50633 - * - * (C) 2006-2008 by Openmoko, Inc. - * All rights reserved. - */ - -#ifndef __LINUX_MFD_PCF50633_GPIO_H -#define __LINUX_MFD_PCF50633_GPIO_H - -#include <linux/mfd/pcf50633/core.h> - -#define PCF50633_GPIO1 1 -#define PCF50633_GPIO2 2 -#define PCF50633_GPIO3 3 -#define PCF50633_GPO 4 - -#define PCF50633_REG_GPIO1CFG 0x14 -#define PCF50633_REG_GPIO2CFG 0x15 -#define PCF50633_REG_GPIO3CFG 0x16 -#define PCF50633_REG_GPOCFG 0x17 - -#define PCF50633_GPOCFG_GPOSEL_MASK 0x07 - -enum pcf50633_reg_gpocfg { - PCF50633_GPOCFG_GPOSEL_0 = 0x00, - PCF50633_GPOCFG_GPOSEL_LED_NFET = 0x01, - PCF50633_GPOCFG_GPOSEL_SYSxOK = 0x02, - PCF50633_GPOCFG_GPOSEL_CLK32K = 0x03, - PCF50633_GPOCFG_GPOSEL_ADAPUSB = 0x04, - PCF50633_GPOCFG_GPOSEL_USBxOK = 0x05, - PCF50633_GPOCFG_GPOSEL_ACTPH4 = 0x06, - PCF50633_GPOCFG_GPOSEL_1 = 0x07, - PCF50633_GPOCFG_GPOSEL_INVERSE = 0x08, -}; - -int pcf50633_gpio_set(struct pcf50633 *pcf, int gpio, u8 val); -u8 pcf50633_gpio_get(struct pcf50633 *pcf, int gpio); - -int pcf50633_gpio_invert_set(struct pcf50633 *, int gpio, int invert); -int pcf50633_gpio_invert_get(struct pcf50633 *pcf, int gpio); - -int pcf50633_gpio_power_supply_set(struct pcf50633 *, - int gpio, int regulator, int on); -#endif /* __LINUX_MFD_PCF50633_GPIO_H */ - - diff --git a/include/linux/mfd/pcf50633/mbc.h b/include/linux/mfd/pcf50633/mbc.h deleted file mode 100644 index fa5cb9256d99..000000000000 --- a/include/linux/mfd/pcf50633/mbc.h +++ /dev/null @@ -1,130 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mbc.h -- Driver for NXP PCF50633 Main Battery Charger - * - * (C) 2006-2008 by Openmoko, Inc. - * All rights reserved. - */ - -#ifndef __LINUX_MFD_PCF50633_MBC_H -#define __LINUX_MFD_PCF50633_MBC_H - -#include <linux/mfd/pcf50633/core.h> -#include <linux/platform_device.h> - -#define PCF50633_REG_MBCC1 0x43 -#define PCF50633_REG_MBCC2 0x44 -#define PCF50633_REG_MBCC3 0x45 -#define PCF50633_REG_MBCC4 0x46 -#define PCF50633_REG_MBCC5 0x47 -#define PCF50633_REG_MBCC6 0x48 -#define PCF50633_REG_MBCC7 0x49 -#define PCF50633_REG_MBCC8 0x4a -#define PCF50633_REG_MBCS1 0x4b -#define PCF50633_REG_MBCS2 0x4c -#define PCF50633_REG_MBCS3 0x4d - -enum pcf50633_reg_mbcc1 { - PCF50633_MBCC1_CHGENA = 0x01, /* Charger enable */ - PCF50633_MBCC1_AUTOSTOP = 0x02, - PCF50633_MBCC1_AUTORES = 0x04, /* automatic resume */ - PCF50633_MBCC1_RESUME = 0x08, /* explicit resume cmd */ - PCF50633_MBCC1_RESTART = 0x10, /* restart charging */ - PCF50633_MBCC1_PREWDTIME_60M = 0x20, /* max. precharging time */ - PCF50633_MBCC1_WDTIME_1H = 0x00, - PCF50633_MBCC1_WDTIME_2H = 0x40, - PCF50633_MBCC1_WDTIME_4H = 0x80, - PCF50633_MBCC1_WDTIME_6H = 0xc0, -}; -#define PCF50633_MBCC1_WDTIME_MASK 0xc0 - -enum pcf50633_reg_mbcc2 { - PCF50633_MBCC2_VBATCOND_2V7 = 0x00, - PCF50633_MBCC2_VBATCOND_2V85 = 0x01, - PCF50633_MBCC2_VBATCOND_3V0 = 0x02, - PCF50633_MBCC2_VBATCOND_3V15 = 0x03, - PCF50633_MBCC2_VMAX_4V = 0x00, - PCF50633_MBCC2_VMAX_4V20 = 0x28, - PCF50633_MBCC2_VRESDEBTIME_64S = 0x80, /* debounce time (32/64sec) */ -}; - -enum pcf50633_reg_mbcc7 { - PCF50633_MBCC7_USB_100mA = 0x00, - PCF50633_MBCC7_USB_500mA = 0x01, - PCF50633_MBCC7_USB_1000mA = 0x02, - PCF50633_MBCC7_USB_SUSPEND = 0x03, - PCF50633_MBCC7_BATTEMP_EN = 0x04, - PCF50633_MBCC7_BATSYSIMAX_1A6 = 0x00, - PCF50633_MBCC7_BATSYSIMAX_1A8 = 0x40, - PCF50633_MBCC7_BATSYSIMAX_2A0 = 0x80, - PCF50633_MBCC7_BATSYSIMAX_2A2 = 0xc0, -}; -#define PCF50633_MBCC7_USB_MASK 0x03 - -enum pcf50633_reg_mbcc8 { - PCF50633_MBCC8_USBENASUS = 0x10, -}; - -enum pcf50633_reg_mbcs1 { - PCF50633_MBCS1_USBPRES = 0x01, - PCF50633_MBCS1_USBOK = 0x02, - PCF50633_MBCS1_ADAPTPRES = 0x04, - PCF50633_MBCS1_ADAPTOK = 0x08, - PCF50633_MBCS1_TBAT_OK = 0x00, - PCF50633_MBCS1_TBAT_ABOVE = 0x10, - PCF50633_MBCS1_TBAT_BELOW = 0x20, - PCF50633_MBCS1_TBAT_UNDEF = 0x30, - PCF50633_MBCS1_PREWDTEXP = 0x40, - PCF50633_MBCS1_WDTEXP = 0x80, -}; - -enum pcf50633_reg_mbcs2_mbcmod { - PCF50633_MBCS2_MBC_PLAY = 0x00, - PCF50633_MBCS2_MBC_USB_PRE = 0x01, - PCF50633_MBCS2_MBC_USB_PRE_WAIT = 0x02, - PCF50633_MBCS2_MBC_USB_FAST = 0x03, - PCF50633_MBCS2_MBC_USB_FAST_WAIT = 0x04, - PCF50633_MBCS2_MBC_USB_SUSPEND = 0x05, - PCF50633_MBCS2_MBC_ADP_PRE = 0x06, - PCF50633_MBCS2_MBC_ADP_PRE_WAIT = 0x07, - PCF50633_MBCS2_MBC_ADP_FAST = 0x08, - PCF50633_MBCS2_MBC_ADP_FAST_WAIT = 0x09, - PCF50633_MBCS2_MBC_BAT_FULL = 0x0a, - PCF50633_MBCS2_MBC_HALT = 0x0b, -}; -#define PCF50633_MBCS2_MBC_MASK 0x0f -enum pcf50633_reg_mbcs2_chgstat { - PCF50633_MBCS2_CHGS_NONE = 0x00, - PCF50633_MBCS2_CHGS_ADAPTER = 0x10, - PCF50633_MBCS2_CHGS_USB = 0x20, - PCF50633_MBCS2_CHGS_BOTH = 0x30, -}; -#define PCF50633_MBCS2_RESSTAT_AUTO 0x40 - -enum pcf50633_reg_mbcs3 { - PCF50633_MBCS3_USBLIM_PLAY = 0x01, - PCF50633_MBCS3_USBLIM_CGH = 0x02, - PCF50633_MBCS3_TLIM_PLAY = 0x04, - PCF50633_MBCS3_TLIM_CHG = 0x08, - PCF50633_MBCS3_ILIM = 0x10, /* 1: Ibat > Icutoff */ - PCF50633_MBCS3_VLIM = 0x20, /* 1: Vbat == Vmax */ - PCF50633_MBCS3_VBATSTAT = 0x40, /* 1: Vbat > Vbatcond */ - PCF50633_MBCS3_VRES = 0x80, /* 1: Vbat > Vth(RES) */ -}; - -#define PCF50633_MBCC2_VBATCOND_MASK 0x03 -#define PCF50633_MBCC2_VMAX_MASK 0x3c - -/* Charger status */ -#define PCF50633_MBC_USB_ONLINE 0x01 -#define PCF50633_MBC_USB_ACTIVE 0x02 -#define PCF50633_MBC_ADAPTER_ONLINE 0x04 -#define PCF50633_MBC_ADAPTER_ACTIVE 0x08 - -int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma); - -int pcf50633_mbc_get_status(struct pcf50633 *); -int pcf50633_mbc_get_usb_online_status(struct pcf50633 *); - -#endif - diff --git a/include/linux/mfd/pcf50633/pmic.h b/include/linux/mfd/pcf50633/pmic.h deleted file mode 100644 index eac0c3d8e984..000000000000 --- a/include/linux/mfd/pcf50633/pmic.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_MFD_PCF50633_PMIC_H -#define __LINUX_MFD_PCF50633_PMIC_H - -#include <linux/mfd/pcf50633/core.h> -#include <linux/platform_device.h> - -#define PCF50633_REG_AUTOOUT 0x1a -#define PCF50633_REG_AUTOENA 0x1b -#define PCF50633_REG_AUTOCTL 0x1c -#define PCF50633_REG_AUTOMXC 0x1d -#define PCF50633_REG_DOWN1OUT 0x1e -#define PCF50633_REG_DOWN1ENA 0x1f -#define PCF50633_REG_DOWN1CTL 0x20 -#define PCF50633_REG_DOWN1MXC 0x21 -#define PCF50633_REG_DOWN2OUT 0x22 -#define PCF50633_REG_DOWN2ENA 0x23 -#define PCF50633_REG_DOWN2CTL 0x24 -#define PCF50633_REG_DOWN2MXC 0x25 -#define PCF50633_REG_MEMLDOOUT 0x26 -#define PCF50633_REG_MEMLDOENA 0x27 -#define PCF50633_REG_LDO1OUT 0x2d -#define PCF50633_REG_LDO1ENA 0x2e -#define PCF50633_REG_LDO2OUT 0x2f -#define PCF50633_REG_LDO2ENA 0x30 -#define PCF50633_REG_LDO3OUT 0x31 -#define PCF50633_REG_LDO3ENA 0x32 -#define PCF50633_REG_LDO4OUT 0x33 -#define PCF50633_REG_LDO4ENA 0x34 -#define PCF50633_REG_LDO5OUT 0x35 -#define PCF50633_REG_LDO5ENA 0x36 -#define PCF50633_REG_LDO6OUT 0x37 -#define PCF50633_REG_LDO6ENA 0x38 -#define PCF50633_REG_HCLDOOUT 0x39 -#define PCF50633_REG_HCLDOENA 0x3a -#define PCF50633_REG_HCLDOOVL 0x40 - -enum pcf50633_regulator_enable { - PCF50633_REGULATOR_ON = 0x01, - PCF50633_REGULATOR_ON_GPIO1 = 0x02, - PCF50633_REGULATOR_ON_GPIO2 = 0x04, - PCF50633_REGULATOR_ON_GPIO3 = 0x08, -}; -#define PCF50633_REGULATOR_ON_MASK 0x0f - -enum pcf50633_regulator_phase { - PCF50633_REGULATOR_ACTPH1 = 0x00, - PCF50633_REGULATOR_ACTPH2 = 0x10, - PCF50633_REGULATOR_ACTPH3 = 0x20, - PCF50633_REGULATOR_ACTPH4 = 0x30, -}; -#define PCF50633_REGULATOR_ACTPH_MASK 0x30 - -enum pcf50633_regulator_id { - PCF50633_REGULATOR_AUTO, - PCF50633_REGULATOR_DOWN1, - PCF50633_REGULATOR_DOWN2, - PCF50633_REGULATOR_LDO1, - PCF50633_REGULATOR_LDO2, - PCF50633_REGULATOR_LDO3, - PCF50633_REGULATOR_LDO4, - PCF50633_REGULATOR_LDO5, - PCF50633_REGULATOR_LDO6, - PCF50633_REGULATOR_HCLDO, - PCF50633_REGULATOR_MEMLDO, -}; -#endif - diff --git a/include/linux/mfd/qnap-mcu.h b/include/linux/mfd/qnap-mcu.h new file mode 100644 index 000000000000..8d48c212fd44 --- /dev/null +++ b/include/linux/mfd/qnap-mcu.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Core definitions for QNAP MCU MFD driver. + * Copyright (C) 2024 Heiko Stuebner <heiko@sntech.de> + */ + +#ifndef _LINUX_QNAP_MCU_H_ +#define _LINUX_QNAP_MCU_H_ + +struct qnap_mcu; + +struct qnap_mcu_variant { + u32 baud_rate; + int num_drives; + int fan_pwm_min; + int fan_pwm_max; + bool usb_led; +}; + +int qnap_mcu_exec(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size, + u8 *reply_data, size_t reply_data_size); +int qnap_mcu_exec_with_ack(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size); + +#endif /* _LINUX_QNAP_MCU_H_ */ diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 750274d41fc0..f35314458fd2 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -44,6 +44,7 @@ enum sec_device_type { S2MPS14X, S2MPS15X, S2MPU02, + S2MPU05, }; /** diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h index 3fd2775eb9bb..978f7af66f74 100644 --- a/include/linux/mfd/samsung/irq.h +++ b/include/linux/mfd/samsung/irq.h @@ -150,6 +150,50 @@ enum s2mpu02_irq { /* Masks for interrupts are the same as in s2mps11 */ #define S2MPS14_IRQ_TSD_MASK (1 << 2) +enum s2mpu05_irq { + S2MPU05_IRQ_PWRONF, + S2MPU05_IRQ_PWRONR, + S2MPU05_IRQ_JIGONBF, + S2MPU05_IRQ_JIGONBR, + S2MPU05_IRQ_ACOKF, + S2MPU05_IRQ_ACOKR, + S2MPU05_IRQ_PWRON1S, + S2MPU05_IRQ_MRB, + + S2MPU05_IRQ_RTC60S, + S2MPU05_IRQ_RTCA1, + S2MPU05_IRQ_RTCA0, + S2MPU05_IRQ_SMPL, + S2MPU05_IRQ_RTC1S, + S2MPU05_IRQ_WTSR, + + S2MPU05_IRQ_INT120C, + S2MPU05_IRQ_INT140C, + S2MPU05_IRQ_TSD, + + S2MPU05_IRQ_NR, +}; + +#define S2MPU05_IRQ_PWRONF_MASK BIT(0) +#define S2MPU05_IRQ_PWRONR_MASK BIT(1) +#define S2MPU05_IRQ_JIGONBF_MASK BIT(2) +#define S2MPU05_IRQ_JIGONBR_MASK BIT(3) +#define S2MPU05_IRQ_ACOKF_MASK BIT(4) +#define S2MPU05_IRQ_ACOKR_MASK BIT(5) +#define S2MPU05_IRQ_PWRON1S_MASK BIT(6) +#define S2MPU05_IRQ_MRB_MASK BIT(7) + +#define S2MPU05_IRQ_RTC60S_MASK BIT(0) +#define S2MPU05_IRQ_RTCA1_MASK BIT(1) +#define S2MPU05_IRQ_RTCA0_MASK BIT(2) +#define S2MPU05_IRQ_SMPL_MASK BIT(3) +#define S2MPU05_IRQ_RTC1S_MASK BIT(4) +#define S2MPU05_IRQ_WTSR_MASK BIT(5) + +#define S2MPU05_IRQ_INT120C_MASK BIT(0) +#define S2MPU05_IRQ_INT140C_MASK BIT(1) +#define S2MPU05_IRQ_TSD_MASK BIT(2) + enum s5m8767_irq { S5M8767_IRQ_PWRR, S5M8767_IRQ_PWRF, diff --git a/include/linux/mfd/samsung/s2mpu05.h b/include/linux/mfd/samsung/s2mpu05.h new file mode 100644 index 000000000000..fcdb6c8adb03 --- /dev/null +++ b/include/linux/mfd/samsung/s2mpu05.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2015 Samsung Electronics Co., Ltd + * Copyright (c) 2025 Kaustabh Chakraborty <kauschluss@disroot.org> + */ + +#ifndef __LINUX_MFD_S2MPU05_H +#define __LINUX_MFD_S2MPU05_H + +/* S2MPU05 registers */ +enum S2MPU05_reg { + S2MPU05_REG_ID, + S2MPU05_REG_INT1, + S2MPU05_REG_INT2, + S2MPU05_REG_INT3, + S2MPU05_REG_INT1M, + S2MPU05_REG_INT2M, + S2MPU05_REG_INT3M, + S2MPU05_REG_ST1, + S2MPU05_REG_ST2, + S2MPU05_REG_PWRONSRC, + S2MPU05_REG_OFFSRC, + S2MPU05_REG_BU_CHG, + S2MPU05_REG_RTC_BUF, + S2MPU05_REG_CTRL1, + S2MPU05_REG_CTRL2, + S2MPU05_REG_ETC_TEST, + S2MPU05_REG_OTP_ADRL, + S2MPU05_REG_OTP_ADRH, + S2MPU05_REG_OTP_DATA, + S2MPU05_REG_MON1SEL, + S2MPU05_REG_MON2SEL, + S2MPU05_REG_CTRL3, + S2MPU05_REG_ETC_OTP, + S2MPU05_REG_UVLO, + S2MPU05_REG_TIME_CTRL1, + S2MPU05_REG_TIME_CTRL2, + S2MPU05_REG_B1CTRL1, + S2MPU05_REG_B1CTRL2, + S2MPU05_REG_B2CTRL1, + S2MPU05_REG_B2CTRL2, + S2MPU05_REG_B2CTRL3, + S2MPU05_REG_B2CTRL4, + S2MPU05_REG_B3CTRL1, + S2MPU05_REG_B3CTRL2, + S2MPU05_REG_B3CTRL3, + S2MPU05_REG_B4CTRL1, + S2MPU05_REG_B4CTRL2, + S2MPU05_REG_B5CTRL1, + S2MPU05_REG_B5CTRL2, + S2MPU05_REG_BUCK_RAMP, + S2MPU05_REG_LDO_DVS1, + S2MPU05_REG_LDO_DVS9, + S2MPU05_REG_LDO_DVS10, + S2MPU05_REG_L1CTRL, + S2MPU05_REG_L2CTRL, + S2MPU05_REG_L3CTRL, + S2MPU05_REG_L4CTRL, + S2MPU05_REG_L5CTRL, + S2MPU05_REG_L6CTRL, + S2MPU05_REG_L7CTRL, + S2MPU05_REG_L8CTRL, + S2MPU05_REG_L9CTRL1, + S2MPU05_REG_L9CTRL2, + S2MPU05_REG_L10CTRL, + S2MPU05_REG_L11CTRL1, + S2MPU05_REG_L11CTRL2, + S2MPU05_REG_L12CTRL, + S2MPU05_REG_L13CTRL, + S2MPU05_REG_L14CTRL, + S2MPU05_REG_L15CTRL, + S2MPU05_REG_L16CTRL, + S2MPU05_REG_L17CTRL1, + S2MPU05_REG_L17CTRL2, + S2MPU05_REG_L18CTRL1, + S2MPU05_REG_L18CTRL2, + S2MPU05_REG_L19CTRL, + S2MPU05_REG_L20CTRL, + S2MPU05_REG_L21CTRL, + S2MPU05_REG_L22CTRL, + S2MPU05_REG_L23CTRL, + S2MPU05_REG_L24CTRL, + S2MPU05_REG_L25CTRL, + S2MPU05_REG_L26CTRL, + S2MPU05_REG_L27CTRL, + S2MPU05_REG_L28CTRL, + S2MPU05_REG_L29CTRL, + S2MPU05_REG_L30CTRL, + S2MPU05_REG_L31CTRL, + S2MPU05_REG_L32CTRL, + S2MPU05_REG_L33CTRL, + S2MPU05_REG_L34CTRL, + S2MPU05_REG_L35CTRL, + S2MPU05_REG_LDO_DSCH1, + S2MPU05_REG_LDO_DSCH2, + S2MPU05_REG_LDO_DSCH3, + S2MPU05_REG_LDO_DSCH4, + S2MPU05_REG_LDO_DSCH5, + S2MPU05_REG_LDO_CTRL1, + S2MPU05_REG_LDO_CTRL2, + S2MPU05_REG_TCXO_CTRL, + S2MPU05_REG_SELMIF, +}; + +/* S2MPU05 regulator ids */ +enum S2MPU05_regulators { + S2MPU05_LDO1, + S2MPU05_LDO2, + S2MPU05_LDO3, + S2MPU05_LDO4, + S2MPU05_LDO5, + S2MPU05_LDO6, + S2MPU05_LDO7, + S2MPU05_LDO8, + S2MPU05_LDO9, + S2MPU05_LDO10, + S2MPU05_LDO11, + S2MPU05_LDO12, + S2MPU05_LDO13, + S2MPU05_LDO14, + S2MPU05_LDO15, + S2MPU05_LDO16, + S2MPU05_LDO17, + S2MPU05_LDO18, + S2MPU05_LDO19, + S2MPU05_LDO20, + S2MPU05_LDO21, + S2MPU05_LDO22, + S2MPU05_LDO23, + S2MPU05_LDO24, + S2MPU05_LDO25, + S2MPU05_LDO26, + S2MPU05_LDO27, + S2MPU05_LDO28, + S2MPU05_LDO29, + S2MPU05_LDO30, + S2MPU05_LDO31, + S2MPU05_LDO32, + S2MPU05_LDO33, + S2MPU05_LDO34, + S2MPU05_LDO35, + S2MPU05_BUCK1, + S2MPU05_BUCK2, + S2MPU05_BUCK3, + S2MPU05_BUCK4, + S2MPU05_BUCK5, + + S2MPU05_REGULATOR_MAX, +}; + +#define S2MPU05_SW_ENABLE_MASK 0x03 + +#define S2MPU05_ENABLE_TIME_LDO 128 +#define S2MPU05_ENABLE_TIME_BUCK1 110 +#define S2MPU05_ENABLE_TIME_BUCK2 110 +#define S2MPU05_ENABLE_TIME_BUCK3 110 +#define S2MPU05_ENABLE_TIME_BUCK4 150 +#define S2MPU05_ENABLE_TIME_BUCK5 150 + +#define S2MPU05_LDO_MIN1 800000 +#define S2MPU05_LDO_MIN2 1800000 +#define S2MPU05_LDO_MIN3 400000 +#define S2MPU05_LDO_STEP1 12500 +#define S2MPU05_LDO_STEP2 25000 + +#define S2MPU05_BUCK_MIN1 400000 +#define S2MPU05_BUCK_MIN2 600000 +#define S2MPU05_BUCK_STEP1 6250 +#define S2MPU05_BUCK_STEP2 12500 + +#define S2MPU05_RAMP_DELAY 12000 /* uV/uS */ + +#define S2MPU05_ENABLE_SHIFT 6 +#define S2MPU05_ENABLE_MASK (0x03 << S2MPU05_ENABLE_SHIFT) + +#define S2MPU05_LDO_VSEL_MASK 0x3F +#define S2MPU05_BUCK_VSEL_MASK 0xFF +#define S2MPU05_LDO_N_VOLTAGES (S2MPU05_LDO_VSEL_MASK + 1) +#define S2MPU05_BUCK_N_VOLTAGES (S2MPU05_BUCK_VSEL_MASK + 1) + +#define S2MPU05_PMIC_EN_SHIFT 6 + +#endif /* __LINUX_MFD_S2MPU05_H */ diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h deleted file mode 100644 index 2001ca5c44a9..000000000000 --- a/include/linux/mfd/sta2x11-mfd.h +++ /dev/null @@ -1,506 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2009-2011 Wind River Systems, Inc. - * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini) - * - * The STMicroelectronics ConneXt (STA2X11) chip has several unrelated - * functions in one PCI endpoint functions. This driver simply - * registers the platform devices in this iomemregion and exports a few - * functions to access common registers - */ - -#ifndef __STA2X11_MFD_H -#define __STA2X11_MFD_H -#include <linux/types.h> -#include <linux/pci.h> - -enum sta2x11_mfd_plat_dev { - sta2x11_sctl = 0, - sta2x11_gpio, - sta2x11_scr, - sta2x11_time, - sta2x11_apbreg, - sta2x11_apb_soc_regs, - sta2x11_vic, - sta2x11_n_mfd_plat_devs, -}; - -#define STA2X11_MFD_SCTL_NAME "sta2x11-sctl" -#define STA2X11_MFD_GPIO_NAME "sta2x11-gpio" -#define STA2X11_MFD_SCR_NAME "sta2x11-scr" -#define STA2X11_MFD_TIME_NAME "sta2x11-time" -#define STA2X11_MFD_APBREG_NAME "sta2x11-apbreg" -#define STA2X11_MFD_APB_SOC_REGS_NAME "sta2x11-apb-soc-regs" -#define STA2X11_MFD_VIC_NAME "sta2x11-vic" - -extern u32 -__sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev); - -/* - * The MFD PCI block includes the GPIO peripherals and other register blocks. - * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".) - */ -#define GSTA_GPIO_PER_BLOCK 32 -#define GSTA_NR_BLOCKS 4 -#define GSTA_NR_GPIO (GSTA_GPIO_PER_BLOCK * GSTA_NR_BLOCKS) - -/* Pinconfig is set by the board definition: altfunc, pull-up, pull-down */ -struct sta2x11_gpio_pdata { - unsigned pinconfig[GSTA_NR_GPIO]; -}; - -/* Macros below lifted from sh_pfc.h, with minor differences */ -#define PINMUX_TYPE_NONE 0 -#define PINMUX_TYPE_FUNCTION 1 -#define PINMUX_TYPE_OUTPUT_LOW 2 -#define PINMUX_TYPE_OUTPUT_HIGH 3 -#define PINMUX_TYPE_INPUT 4 -#define PINMUX_TYPE_INPUT_PULLUP 5 -#define PINMUX_TYPE_INPUT_PULLDOWN 6 - -/* Give names to GPIO pins, like PXA does, taken from the manual */ -#define STA2X11_GPIO0 0 -#define STA2X11_GPIO1 1 -#define STA2X11_GPIO2 2 -#define STA2X11_GPIO3 3 -#define STA2X11_GPIO4 4 -#define STA2X11_GPIO5 5 -#define STA2X11_GPIO6 6 -#define STA2X11_GPIO7 7 -#define STA2X11_GPIO8_RGBOUT_RED7 8 -#define STA2X11_GPIO9_RGBOUT_RED6 9 -#define STA2X11_GPIO10_RGBOUT_RED5 10 -#define STA2X11_GPIO11_RGBOUT_RED4 11 -#define STA2X11_GPIO12_RGBOUT_RED3 12 -#define STA2X11_GPIO13_RGBOUT_RED2 13 -#define STA2X11_GPIO14_RGBOUT_RED1 14 -#define STA2X11_GPIO15_RGBOUT_RED0 15 -#define STA2X11_GPIO16_RGBOUT_GREEN7 16 -#define STA2X11_GPIO17_RGBOUT_GREEN6 17 -#define STA2X11_GPIO18_RGBOUT_GREEN5 18 -#define STA2X11_GPIO19_RGBOUT_GREEN4 19 -#define STA2X11_GPIO20_RGBOUT_GREEN3 20 -#define STA2X11_GPIO21_RGBOUT_GREEN2 21 -#define STA2X11_GPIO22_RGBOUT_GREEN1 22 -#define STA2X11_GPIO23_RGBOUT_GREEN0 23 -#define STA2X11_GPIO24_RGBOUT_BLUE7 24 -#define STA2X11_GPIO25_RGBOUT_BLUE6 25 -#define STA2X11_GPIO26_RGBOUT_BLUE5 26 -#define STA2X11_GPIO27_RGBOUT_BLUE4 27 -#define STA2X11_GPIO28_RGBOUT_BLUE3 28 -#define STA2X11_GPIO29_RGBOUT_BLUE2 29 -#define STA2X11_GPIO30_RGBOUT_BLUE1 30 -#define STA2X11_GPIO31_RGBOUT_BLUE0 31 -#define STA2X11_GPIO32_RGBOUT_VSYNCH 32 -#define STA2X11_GPIO33_RGBOUT_HSYNCH 33 -#define STA2X11_GPIO34_RGBOUT_DEN 34 -#define STA2X11_GPIO35_ETH_CRS_DV 35 -#define STA2X11_GPIO36_ETH_TXD1 36 -#define STA2X11_GPIO37_ETH_TXD0 37 -#define STA2X11_GPIO38_ETH_TX_EN 38 -#define STA2X11_GPIO39_MDIO 39 -#define STA2X11_GPIO40_ETH_REF_CLK 40 -#define STA2X11_GPIO41_ETH_RXD1 41 -#define STA2X11_GPIO42_ETH_RXD0 42 -#define STA2X11_GPIO43_MDC 43 -#define STA2X11_GPIO44_CAN_TX 44 -#define STA2X11_GPIO45_CAN_RX 45 -#define STA2X11_GPIO46_MLB_DAT 46 -#define STA2X11_GPIO47_MLB_SIG 47 -#define STA2X11_GPIO48_SPI0_CLK 48 -#define STA2X11_GPIO49_SPI0_TXD 49 -#define STA2X11_GPIO50_SPI0_RXD 50 -#define STA2X11_GPIO51_SPI0_FRM 51 -#define STA2X11_GPIO52_SPI1_CLK 52 -#define STA2X11_GPIO53_SPI1_TXD 53 -#define STA2X11_GPIO54_SPI1_RXD 54 -#define STA2X11_GPIO55_SPI1_FRM 55 -#define STA2X11_GPIO56_SPI2_CLK 56 -#define STA2X11_GPIO57_SPI2_TXD 57 -#define STA2X11_GPIO58_SPI2_RXD 58 -#define STA2X11_GPIO59_SPI2_FRM 59 -#define STA2X11_GPIO60_I2C0_SCL 60 -#define STA2X11_GPIO61_I2C0_SDA 61 -#define STA2X11_GPIO62_I2C1_SCL 62 -#define STA2X11_GPIO63_I2C1_SDA 63 -#define STA2X11_GPIO64_I2C2_SCL 64 -#define STA2X11_GPIO65_I2C2_SDA 65 -#define STA2X11_GPIO66_I2C3_SCL 66 -#define STA2X11_GPIO67_I2C3_SDA 67 -#define STA2X11_GPIO68_MSP0_RCK 68 -#define STA2X11_GPIO69_MSP0_RXD 69 -#define STA2X11_GPIO70_MSP0_RFS 70 -#define STA2X11_GPIO71_MSP0_TCK 71 -#define STA2X11_GPIO72_MSP0_TXD 72 -#define STA2X11_GPIO73_MSP0_TFS 73 -#define STA2X11_GPIO74_MSP0_SCK 74 -#define STA2X11_GPIO75_MSP1_CK 75 -#define STA2X11_GPIO76_MSP1_RXD 76 -#define STA2X11_GPIO77_MSP1_FS 77 -#define STA2X11_GPIO78_MSP1_TXD 78 -#define STA2X11_GPIO79_MSP2_CK 79 -#define STA2X11_GPIO80_MSP2_RXD 80 -#define STA2X11_GPIO81_MSP2_FS 81 -#define STA2X11_GPIO82_MSP2_TXD 82 -#define STA2X11_GPIO83_MSP3_CK 83 -#define STA2X11_GPIO84_MSP3_RXD 84 -#define STA2X11_GPIO85_MSP3_FS 85 -#define STA2X11_GPIO86_MSP3_TXD 86 -#define STA2X11_GPIO87_MSP4_CK 87 -#define STA2X11_GPIO88_MSP4_RXD 88 -#define STA2X11_GPIO89_MSP4_FS 89 -#define STA2X11_GPIO90_MSP4_TXD 90 -#define STA2X11_GPIO91_MSP5_CK 91 -#define STA2X11_GPIO92_MSP5_RXD 92 -#define STA2X11_GPIO93_MSP5_FS 93 -#define STA2X11_GPIO94_MSP5_TXD 94 -#define STA2X11_GPIO95_SDIO3_DAT3 95 -#define STA2X11_GPIO96_SDIO3_DAT2 96 -#define STA2X11_GPIO97_SDIO3_DAT1 97 -#define STA2X11_GPIO98_SDIO3_DAT0 98 -#define STA2X11_GPIO99_SDIO3_CLK 99 -#define STA2X11_GPIO100_SDIO3_CMD 100 -#define STA2X11_GPIO101 101 -#define STA2X11_GPIO102 102 -#define STA2X11_GPIO103 103 -#define STA2X11_GPIO104 104 -#define STA2X11_GPIO105_SDIO2_DAT3 105 -#define STA2X11_GPIO106_SDIO2_DAT2 106 -#define STA2X11_GPIO107_SDIO2_DAT1 107 -#define STA2X11_GPIO108_SDIO2_DAT0 108 -#define STA2X11_GPIO109_SDIO2_CLK 109 -#define STA2X11_GPIO110_SDIO2_CMD 110 -#define STA2X11_GPIO111 111 -#define STA2X11_GPIO112 112 -#define STA2X11_GPIO113 113 -#define STA2X11_GPIO114 114 -#define STA2X11_GPIO115_SDIO1_DAT3 115 -#define STA2X11_GPIO116_SDIO1_DAT2 116 -#define STA2X11_GPIO117_SDIO1_DAT1 117 -#define STA2X11_GPIO118_SDIO1_DAT0 118 -#define STA2X11_GPIO119_SDIO1_CLK 119 -#define STA2X11_GPIO120_SDIO1_CMD 120 -#define STA2X11_GPIO121 121 -#define STA2X11_GPIO122 122 -#define STA2X11_GPIO123 123 -#define STA2X11_GPIO124 124 -#define STA2X11_GPIO125_UART2_TXD 125 -#define STA2X11_GPIO126_UART2_RXD 126 -#define STA2X11_GPIO127_UART3_TXD 127 - -/* - * The APB bridge has its own registers, needed by our users as well. - * They are accessed with the following read/mask/write function. - */ -static inline u32 -sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) -{ - return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apbreg); -} - -/* CAN and MLB */ -#define APBREG_BSR 0x00 /* Bridge Status Reg */ -#define APBREG_PAER 0x08 /* Peripherals Address Error Reg */ -#define APBREG_PWAC 0x20 /* Peripheral Write Access Control reg */ -#define APBREG_PRAC 0x40 /* Peripheral Read Access Control reg */ -#define APBREG_PCG 0x60 /* Peripheral Clock Gating Reg */ -#define APBREG_PUR 0x80 /* Peripheral Under Reset Reg */ -#define APBREG_EMU_PCG 0xA0 /* Emulator Peripheral Clock Gating Reg */ - -#define APBREG_CAN (1 << 1) -#define APBREG_MLB (1 << 3) - -/* SARAC */ -#define APBREG_BSR_SARAC 0x100 /* Bridge Status Reg */ -#define APBREG_PAER_SARAC 0x108 /* Peripherals Address Error Reg */ -#define APBREG_PWAC_SARAC 0x120 /* Peripheral Write Access Control reg */ -#define APBREG_PRAC_SARAC 0x140 /* Peripheral Read Access Control reg */ -#define APBREG_PCG_SARAC 0x160 /* Peripheral Clock Gating Reg */ -#define APBREG_PUR_SARAC 0x180 /* Peripheral Under Reset Reg */ -#define APBREG_EMU_PCG_SARAC 0x1A0 /* Emulator Peripheral Clock Gating Reg */ - -#define APBREG_SARAC (1 << 2) - -/* - * The system controller has its own registers. Some of these are accessed - * by out users as well, using the following read/mask/write/function - */ -static inline -u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) -{ - return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_sctl); -} - -#define SCTL_SCCTL 0x00 /* System controller control register */ -#define SCTL_ARMCFG 0x04 /* ARM configuration register */ -#define SCTL_SCPLLCTL 0x08 /* PLL control status register */ - -#define SCTL_SCPLLCTL_AUDIO_PLL_PD BIT(1) -#define SCTL_SCPLLCTL_FRAC_CONTROL BIT(3) -#define SCTL_SCPLLCTL_STRB_BYPASS BIT(6) -#define SCTL_SCPLLCTL_STRB_INPUT BIT(8) - -#define SCTL_SCPLLFCTRL 0x0c /* PLL frequency control register */ - -#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_MASK 0xff -#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_SHIFT 10 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_MASK 7 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_SHIFT 21 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_MASK 7 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_SHIFT 18 -#define SCTL_SCPLLFCTRL_DITHER_DISABLE_MASK 0x03 -#define SCTL_SCPLLFCTRL_DITHER_DISABLE_SHIFT 4 - - -#define SCTL_SCRESFRACT 0x10 /* PLL fractional input register */ - -#define SCTL_SCRESFRACT_MASK 0x0000ffff - - -#define SCTL_SCRESCTRL1 0x14 /* Peripheral reset control 1 */ -#define SCTL_SCRESXTRL2 0x18 /* Peripheral reset control 2 */ -#define SCTL_SCPEREN0 0x1c /* Peripheral clock enable register 0 */ -#define SCTL_SCPEREN1 0x20 /* Peripheral clock enable register 1 */ -#define SCTL_SCPEREN2 0x24 /* Peripheral clock enable register 2 */ -#define SCTL_SCGRST 0x28 /* Peripheral global reset */ -#define SCTL_SCPCIECSBRST 0x2c /* PCIe PAB CSB reset status register */ -#define SCTL_SCPCIPMCR1 0x30 /* PCI power management control 1 */ -#define SCTL_SCPCIPMCR2 0x34 /* PCI power management control 2 */ -#define SCTL_SCPCIPMSR1 0x38 /* PCI power management status 1 */ -#define SCTL_SCPCIPMSR2 0x3c /* PCI power management status 2 */ -#define SCTL_SCPCIPMSR3 0x40 /* PCI power management status 3 */ -#define SCTL_SCINTREN 0x44 /* Interrupt enable */ -#define SCTL_SCRISR 0x48 /* RAW interrupt status */ -#define SCTL_SCCLKSTAT0 0x4c /* Peripheral clocks status 0 */ -#define SCTL_SCCLKSTAT1 0x50 /* Peripheral clocks status 1 */ -#define SCTL_SCCLKSTAT2 0x54 /* Peripheral clocks status 2 */ -#define SCTL_SCRSTSTA 0x58 /* Reset status register */ - -#define SCTL_SCRESCTRL1_USB_PHY_POR (1 << 0) -#define SCTL_SCRESCTRL1_USB_OTG (1 << 1) -#define SCTL_SCRESCTRL1_USB_HRST (1 << 2) -#define SCTL_SCRESCTRL1_USB_PHY_HOST (1 << 3) -#define SCTL_SCRESCTRL1_SATAII (1 << 4) -#define SCTL_SCRESCTRL1_VIP (1 << 5) -#define SCTL_SCRESCTRL1_PER_MMC0 (1 << 6) -#define SCTL_SCRESCTRL1_PER_MMC1 (1 << 7) -#define SCTL_SCRESCTRL1_PER_GPIO0 (1 << 8) -#define SCTL_SCRESCTRL1_PER_GPIO1 (1 << 9) -#define SCTL_SCRESCTRL1_PER_GPIO2 (1 << 10) -#define SCTL_SCRESCTRL1_PER_GPIO3 (1 << 11) -#define SCTL_SCRESCTRL1_PER_MTU0 (1 << 12) -#define SCTL_SCRESCTRL1_KER_SPI0 (1 << 13) -#define SCTL_SCRESCTRL1_KER_SPI1 (1 << 14) -#define SCTL_SCRESCTRL1_KER_SPI2 (1 << 15) -#define SCTL_SCRESCTRL1_KER_MCI0 (1 << 16) -#define SCTL_SCRESCTRL1_KER_MCI1 (1 << 17) -#define SCTL_SCRESCTRL1_PRE_HSI2C0 (1 << 18) -#define SCTL_SCRESCTRL1_PER_HSI2C1 (1 << 19) -#define SCTL_SCRESCTRL1_PER_HSI2C2 (1 << 20) -#define SCTL_SCRESCTRL1_PER_HSI2C3 (1 << 21) -#define SCTL_SCRESCTRL1_PER_MSP0 (1 << 22) -#define SCTL_SCRESCTRL1_PER_MSP1 (1 << 23) -#define SCTL_SCRESCTRL1_PER_MSP2 (1 << 24) -#define SCTL_SCRESCTRL1_PER_MSP3 (1 << 25) -#define SCTL_SCRESCTRL1_PER_MSP4 (1 << 26) -#define SCTL_SCRESCTRL1_PER_MSP5 (1 << 27) -#define SCTL_SCRESCTRL1_PER_MMC (1 << 28) -#define SCTL_SCRESCTRL1_KER_MSP0 (1 << 29) -#define SCTL_SCRESCTRL1_KER_MSP1 (1 << 30) -#define SCTL_SCRESCTRL1_KER_MSP2 (1 << 31) - -#define SCTL_SCPEREN0_UART0 (1 << 0) -#define SCTL_SCPEREN0_UART1 (1 << 1) -#define SCTL_SCPEREN0_UART2 (1 << 2) -#define SCTL_SCPEREN0_UART3 (1 << 3) -#define SCTL_SCPEREN0_MSP0 (1 << 4) -#define SCTL_SCPEREN0_MSP1 (1 << 5) -#define SCTL_SCPEREN0_MSP2 (1 << 6) -#define SCTL_SCPEREN0_MSP3 (1 << 7) -#define SCTL_SCPEREN0_MSP4 (1 << 8) -#define SCTL_SCPEREN0_MSP5 (1 << 9) -#define SCTL_SCPEREN0_SPI0 (1 << 10) -#define SCTL_SCPEREN0_SPI1 (1 << 11) -#define SCTL_SCPEREN0_SPI2 (1 << 12) -#define SCTL_SCPEREN0_I2C0 (1 << 13) -#define SCTL_SCPEREN0_I2C1 (1 << 14) -#define SCTL_SCPEREN0_I2C2 (1 << 15) -#define SCTL_SCPEREN0_I2C3 (1 << 16) -#define SCTL_SCPEREN0_SVDO_LVDS (1 << 17) -#define SCTL_SCPEREN0_USB_HOST (1 << 18) -#define SCTL_SCPEREN0_USB_OTG (1 << 19) -#define SCTL_SCPEREN0_MCI0 (1 << 20) -#define SCTL_SCPEREN0_MCI1 (1 << 21) -#define SCTL_SCPEREN0_MCI2 (1 << 22) -#define SCTL_SCPEREN0_MCI3 (1 << 23) -#define SCTL_SCPEREN0_SATA (1 << 24) -#define SCTL_SCPEREN0_ETHERNET (1 << 25) -#define SCTL_SCPEREN0_VIC (1 << 26) -#define SCTL_SCPEREN0_DMA_AUDIO (1 << 27) -#define SCTL_SCPEREN0_DMA_SOC (1 << 28) -#define SCTL_SCPEREN0_RAM (1 << 29) -#define SCTL_SCPEREN0_VIP (1 << 30) -#define SCTL_SCPEREN0_ARM (1 << 31) - -#define SCTL_SCPEREN1_UART0 (1 << 0) -#define SCTL_SCPEREN1_UART1 (1 << 1) -#define SCTL_SCPEREN1_UART2 (1 << 2) -#define SCTL_SCPEREN1_UART3 (1 << 3) -#define SCTL_SCPEREN1_MSP0 (1 << 4) -#define SCTL_SCPEREN1_MSP1 (1 << 5) -#define SCTL_SCPEREN1_MSP2 (1 << 6) -#define SCTL_SCPEREN1_MSP3 (1 << 7) -#define SCTL_SCPEREN1_MSP4 (1 << 8) -#define SCTL_SCPEREN1_MSP5 (1 << 9) -#define SCTL_SCPEREN1_SPI0 (1 << 10) -#define SCTL_SCPEREN1_SPI1 (1 << 11) -#define SCTL_SCPEREN1_SPI2 (1 << 12) -#define SCTL_SCPEREN1_I2C0 (1 << 13) -#define SCTL_SCPEREN1_I2C1 (1 << 14) -#define SCTL_SCPEREN1_I2C2 (1 << 15) -#define SCTL_SCPEREN1_I2C3 (1 << 16) -#define SCTL_SCPEREN1_USB_PHY (1 << 17) - -/* - * APB-SOC registers - */ -static inline -u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) -{ - return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apb_soc_regs); -} - -#define PCIE_EP1_FUNC3_0_INTR_REG 0x000 -#define PCIE_EP1_FUNC7_4_INTR_REG 0x004 -#define PCIE_EP2_FUNC3_0_INTR_REG 0x008 -#define PCIE_EP2_FUNC7_4_INTR_REG 0x00c -#define PCIE_EP3_FUNC3_0_INTR_REG 0x010 -#define PCIE_EP3_FUNC7_4_INTR_REG 0x014 -#define PCIE_EP4_FUNC3_0_INTR_REG 0x018 -#define PCIE_EP4_FUNC7_4_INTR_REG 0x01c -#define PCIE_INTR_ENABLE0_REG 0x020 -#define PCIE_INTR_ENABLE1_REG 0x024 -#define PCIE_EP1_FUNC_TC_REG 0x028 -#define PCIE_EP2_FUNC_TC_REG 0x02c -#define PCIE_EP3_FUNC_TC_REG 0x030 -#define PCIE_EP4_FUNC_TC_REG 0x034 -#define PCIE_EP1_FUNC_F_REG 0x038 -#define PCIE_EP2_FUNC_F_REG 0x03c -#define PCIE_EP3_FUNC_F_REG 0x040 -#define PCIE_EP4_FUNC_F_REG 0x044 -#define PCIE_PAB_AMBA_SW_RST_REG 0x048 -#define PCIE_PM_STATUS_0_PORT_0_4 0x04c -#define PCIE_PM_STATUS_7_0_EP1 0x050 -#define PCIE_PM_STATUS_7_0_EP2 0x054 -#define PCIE_PM_STATUS_7_0_EP3 0x058 -#define PCIE_PM_STATUS_7_0_EP4 0x05c -#define PCIE_DEV_ID_0_EP1_REG 0x060 -#define PCIE_CC_REV_ID_0_EP1_REG 0x064 -#define PCIE_DEV_ID_1_EP1_REG 0x068 -#define PCIE_CC_REV_ID_1_EP1_REG 0x06c -#define PCIE_DEV_ID_2_EP1_REG 0x070 -#define PCIE_CC_REV_ID_2_EP1_REG 0x074 -#define PCIE_DEV_ID_3_EP1_REG 0x078 -#define PCIE_CC_REV_ID_3_EP1_REG 0x07c -#define PCIE_DEV_ID_4_EP1_REG 0x080 -#define PCIE_CC_REV_ID_4_EP1_REG 0x084 -#define PCIE_DEV_ID_5_EP1_REG 0x088 -#define PCIE_CC_REV_ID_5_EP1_REG 0x08c -#define PCIE_DEV_ID_6_EP1_REG 0x090 -#define PCIE_CC_REV_ID_6_EP1_REG 0x094 -#define PCIE_DEV_ID_7_EP1_REG 0x098 -#define PCIE_CC_REV_ID_7_EP1_REG 0x09c -#define PCIE_DEV_ID_0_EP2_REG 0x0a0 -#define PCIE_CC_REV_ID_0_EP2_REG 0x0a4 -#define PCIE_DEV_ID_1_EP2_REG 0x0a8 -#define PCIE_CC_REV_ID_1_EP2_REG 0x0ac -#define PCIE_DEV_ID_2_EP2_REG 0x0b0 -#define PCIE_CC_REV_ID_2_EP2_REG 0x0b4 -#define PCIE_DEV_ID_3_EP2_REG 0x0b8 -#define PCIE_CC_REV_ID_3_EP2_REG 0x0bc -#define PCIE_DEV_ID_4_EP2_REG 0x0c0 -#define PCIE_CC_REV_ID_4_EP2_REG 0x0c4 -#define PCIE_DEV_ID_5_EP2_REG 0x0c8 -#define PCIE_CC_REV_ID_5_EP2_REG 0x0cc -#define PCIE_DEV_ID_6_EP2_REG 0x0d0 -#define PCIE_CC_REV_ID_6_EP2_REG 0x0d4 -#define PCIE_DEV_ID_7_EP2_REG 0x0d8 -#define PCIE_CC_REV_ID_7_EP2_REG 0x0dC -#define PCIE_DEV_ID_0_EP3_REG 0x0e0 -#define PCIE_CC_REV_ID_0_EP3_REG 0x0e4 -#define PCIE_DEV_ID_1_EP3_REG 0x0e8 -#define PCIE_CC_REV_ID_1_EP3_REG 0x0ec -#define PCIE_DEV_ID_2_EP3_REG 0x0f0 -#define PCIE_CC_REV_ID_2_EP3_REG 0x0f4 -#define PCIE_DEV_ID_3_EP3_REG 0x0f8 -#define PCIE_CC_REV_ID_3_EP3_REG 0x0fc -#define PCIE_DEV_ID_4_EP3_REG 0x100 -#define PCIE_CC_REV_ID_4_EP3_REG 0x104 -#define PCIE_DEV_ID_5_EP3_REG 0x108 -#define PCIE_CC_REV_ID_5_EP3_REG 0x10c -#define PCIE_DEV_ID_6_EP3_REG 0x110 -#define PCIE_CC_REV_ID_6_EP3_REG 0x114 -#define PCIE_DEV_ID_7_EP3_REG 0x118 -#define PCIE_CC_REV_ID_7_EP3_REG 0x11c -#define PCIE_DEV_ID_0_EP4_REG 0x120 -#define PCIE_CC_REV_ID_0_EP4_REG 0x124 -#define PCIE_DEV_ID_1_EP4_REG 0x128 -#define PCIE_CC_REV_ID_1_EP4_REG 0x12c -#define PCIE_DEV_ID_2_EP4_REG 0x130 -#define PCIE_CC_REV_ID_2_EP4_REG 0x134 -#define PCIE_DEV_ID_3_EP4_REG 0x138 -#define PCIE_CC_REV_ID_3_EP4_REG 0x13c -#define PCIE_DEV_ID_4_EP4_REG 0x140 -#define PCIE_CC_REV_ID_4_EP4_REG 0x144 -#define PCIE_DEV_ID_5_EP4_REG 0x148 -#define PCIE_CC_REV_ID_5_EP4_REG 0x14c -#define PCIE_DEV_ID_6_EP4_REG 0x150 -#define PCIE_CC_REV_ID_6_EP4_REG 0x154 -#define PCIE_DEV_ID_7_EP4_REG 0x158 -#define PCIE_CC_REV_ID_7_EP4_REG 0x15c -#define PCIE_SUBSYS_VEN_ID_REG 0x160 -#define PCIE_COMMON_CLOCK_CONFIG_0_4_0 0x164 -#define PCIE_MIPHYP_SSC_EN_REG 0x168 -#define PCIE_MIPHYP_ADDR_REG 0x16c -#define PCIE_L1_ASPM_READY_REG 0x170 -#define PCIE_EXT_CFG_RDY_REG 0x174 -#define PCIE_SoC_INT_ROUTER_STATUS0_REG 0x178 -#define PCIE_SoC_INT_ROUTER_STATUS1_REG 0x17c -#define PCIE_SoC_INT_ROUTER_STATUS2_REG 0x180 -#define PCIE_SoC_INT_ROUTER_STATUS3_REG 0x184 -#define DMA_IP_CTRL_REG 0x324 -#define DISP_BRIDGE_PU_PD_CTRL_REG 0x328 -#define VIP_PU_PD_CTRL_REG 0x32c -#define USB_MLB_PU_PD_CTRL_REG 0x330 -#define SDIO_PU_PD_MISCFUNC_CTRL_REG1 0x334 -#define SDIO_PU_PD_MISCFUNC_CTRL_REG2 0x338 -#define UART_PU_PD_CTRL_REG 0x33c -#define ARM_Lock 0x340 -#define SYS_IO_CHAR_REG1 0x344 -#define SYS_IO_CHAR_REG2 0x348 -#define SATA_CORE_ID_REG 0x34c -#define SATA_CTRL_REG 0x350 -#define I2C_HSFIX_MISC_REG 0x354 -#define SPARE2_RESERVED 0x358 -#define SPARE3_RESERVED 0x35c -#define MASTER_LOCK_REG 0x368 -#define SYSTEM_CONFIG_STATUS_REG 0x36c -#define MSP_CLK_CTRL_REG 0x39c -#define COMPENSATION_REG1 0x3c4 -#define COMPENSATION_REG2 0x3c8 -#define COMPENSATION_REG3 0x3cc -#define TEST_CTL_REG 0x3d0 - -/* - * SECR (OTP) registers - */ -#define STA2X11_SECR_CR 0x00 -#define STA2X11_SECR_FVR0 0x10 -#define STA2X11_SECR_FVR1 0x14 - -extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev, - enum sta2x11_mfd_plat_dev index, - void __iomem **regs, - spinlock_t **lock); - -#endif /* __STA2X11_MFD_H */ diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index f09ba598c97a..23b0cae4a9f8 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -33,6 +33,9 @@ #define TIM_DCR 0x48 /* DMA control register */ #define TIM_DMAR 0x4C /* DMA register for transfer */ #define TIM_TISEL 0x68 /* Input Selection */ +#define TIM_HWCFGR2 0x3EC /* hardware configuration 2 Reg (MP25) */ +#define TIM_HWCFGR1 0x3F0 /* hardware configuration 1 Reg (MP25) */ +#define TIM_IPIDR 0x3F8 /* IP identification Reg (MP25) */ #define TIM_CR1_CEN BIT(0) /* Counter Enable */ #define TIM_CR1_DIR BIT(4) /* Counter Direction */ @@ -100,6 +103,9 @@ #define TIM_BDTR_BKF(x) (0xf << (16 + (x) * 4)) #define TIM_DCR_DBA GENMASK(4, 0) /* DMA base addr */ #define TIM_DCR_DBL GENMASK(12, 8) /* DMA burst len */ +#define TIM_HWCFGR1_NB_OF_CC GENMASK(3, 0) /* Capture/compare channels */ +#define TIM_HWCFGR1_NB_OF_DT GENMASK(7, 4) /* Complementary outputs & dead-time generators */ +#define TIM_HWCFGR2_CNT_WIDTH GENMASK(15, 8) /* Counter width */ #define MAX_TIM_PSC 0xFFFF #define MAX_TIM_ICPSC 0x3 @@ -113,6 +119,8 @@ #define TIM_BDTR_BKF_MASK 0xF #define TIM_BDTR_BKF_SHIFT(x) (16 + (x) * 4) +#define STM32MP25_TIM_IPIDR 0x00120002 + enum stm32_timers_dmas { STM32_TIMERS_DMA_CH1, STM32_TIMERS_DMA_CH2, @@ -151,6 +159,7 @@ struct stm32_timers_dma { struct stm32_timers { struct clk *clk; + u32 ipidr; struct regmap *regmap; u32 max_arr; struct stm32_timers_dma dma; /* Only to be used by the parent */ diff --git a/include/linux/mfd/tps65219.h b/include/linux/mfd/tps65219.h index e6826e34e2a6..3e8d29189267 100644 --- a/include/linux/mfd/tps65219.h +++ b/include/linux/mfd/tps65219.h @@ -1,8 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Functions to access TPS65219 Power Management IC. + * Functions to access TPS65215/TPS65219 Power Management Integrated Chips * * Copyright (C) 2022 BayLibre Incorporated - https://www.baylibre.com/ + * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef MFD_TPS65219_H @@ -10,16 +11,15 @@ #include <linux/bitops.h> #include <linux/notifier.h> +#include <linux/regmap.h> #include <linux/regulator/driver.h> -struct regmap; -struct regmap_irq_chip_data; - -#define TPS65219_1V35 1350000 -#define TPS65219_1V8 1800000 - -/* TPS chip id list */ -#define TPS65219 0xF0 +/* Chip id list*/ +enum pmic_id { + TPS65214, + TPS65215, + TPS65219, +}; /* I2C ID for TPS65219 part */ #define TPS65219_I2C_ID 0x24 @@ -29,15 +29,23 @@ struct regmap_irq_chip_data; #define TPS65219_REG_NVM_ID 0x01 #define TPS65219_REG_ENABLE_CTRL 0x02 #define TPS65219_REG_BUCKS_CONFIG 0x03 +#define TPS65214_REG_LOCK 0x03 #define TPS65219_REG_LDO4_VOUT 0x04 +#define TPS65214_REG_LDO1_VOUT_STBY 0x04 #define TPS65219_REG_LDO3_VOUT 0x05 +#define TPS65215_REG_LDO2_VOUT 0x05 +#define TPS65214_REG_LDO1_VOUT 0x05 #define TPS65219_REG_LDO2_VOUT 0x06 +#define TPS65214_REG_LDO2_VOUT 0x06 #define TPS65219_REG_LDO1_VOUT 0x07 +#define TPS65214_REG_LDO2_VOUT_STBY 0x07 #define TPS65219_REG_BUCK3_VOUT 0x8 #define TPS65219_REG_BUCK2_VOUT 0x9 #define TPS65219_REG_BUCK1_VOUT 0xA #define TPS65219_REG_LDO4_SEQUENCE_SLOT 0xB #define TPS65219_REG_LDO3_SEQUENCE_SLOT 0xC +#define TPS65215_REG_LDO2_SEQUENCE_SLOT 0xC +#define TPS65214_REG_LDO1_SEQUENCE_SLOT 0xC #define TPS65219_REG_LDO2_SEQUENCE_SLOT 0xD #define TPS65219_REG_LDO1_SEQUENCE_SLOT 0xE #define TPS65219_REG_BUCK3_SEQUENCE_SLOT 0xF @@ -46,15 +54,21 @@ struct regmap_irq_chip_data; #define TPS65219_REG_nRST_SEQUENCE_SLOT 0x12 #define TPS65219_REG_GPIO_SEQUENCE_SLOT 0x13 #define TPS65219_REG_GPO2_SEQUENCE_SLOT 0x14 +#define TPS65214_REG_GPIO_GPI_SEQUENCE_SLOT 0x14 #define TPS65219_REG_GPO1_SEQUENCE_SLOT 0x15 +#define TPS65214_REG_GPO_SEQUENCE_SLOT 0x15 #define TPS65219_REG_POWER_UP_SLOT_DURATION_1 0x16 #define TPS65219_REG_POWER_UP_SLOT_DURATION_2 0x17 +/* _SLOT_DURATION_3 doesn't apply to TPS65215*/ #define TPS65219_REG_POWER_UP_SLOT_DURATION_3 0x18 #define TPS65219_REG_POWER_UP_SLOT_DURATION_4 0x19 +#define TPS65214_REG_BUCK3_VOUT_STBY 0x19 #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_1 0x1A #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_2 0x1B #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_3 0x1C +#define TPS65214_REG_BUCK2_VOUT_STBY 0x1C #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_4 0x1D +#define TPS65214_REG_BUCK1_VOUT_STBY 0x1D #define TPS65219_REG_GENERAL_CONFIG 0x1E #define TPS65219_REG_MFP_1_CONFIG 0x1F #define TPS65219_REG_MFP_2_CONFIG 0x20 @@ -72,9 +86,19 @@ struct regmap_irq_chip_data; #define TPS65219_REG_DISCHARGE_CONFIG 0x2A /* main irq registers */ #define TPS65219_REG_INT_SOURCE 0x2B -/* 'sub irq' registers */ + +/* TPS65219 'sub irq' registers */ #define TPS65219_REG_INT_LDO_3_4 0x2C #define TPS65219_REG_INT_LDO_1_2 0x2D + +/* TPS65215 specific 'sub irq' registers */ +#define TPS65215_REG_INT_LDO_2 0x2C +#define TPS65215_REG_INT_LDO_1 0x2D + +/* TPS65214 specific 'sub irq' register */ +#define TPS65214_REG_INT_LDO_1_2 0x2D + +/* Common TPS65215 & TPS65219 'sub irq' registers */ #define TPS65219_REG_INT_BUCK_3 0x2E #define TPS65219_REG_INT_BUCK_1_2 0x2F #define TPS65219_REG_INT_SYSTEM 0x30 @@ -91,6 +115,17 @@ struct regmap_irq_chip_data; #define TPS65219_REG_INT_TO_RV_POS 6 #define TPS65219_REG_INT_PB_POS 7 +#define TPS65215_REG_INT_LDO_2_POS 0 +#define TPS65215_REG_INT_LDO_1_POS 1 + +#define TPS65214_REG_INT_LDO_1_2_POS 0 +#define TPS65214_REG_INT_BUCK_3_POS 1 +#define TPS65214_REG_INT_BUCK_1_2_POS 2 +#define TPS65214_REG_INT_SYS_POS 3 +#define TPS65214_REG_INT_RV_POS 4 +#define TPS65214_REG_INT_TO_RV_POS 5 +#define TPS65214_REG_INT_PB_POS 6 + #define TPS65219_REG_USER_NVM_CMD 0x34 #define TPS65219_REG_POWER_UP_STATUS 0x35 #define TPS65219_REG_SPARE_2 0x36 @@ -112,6 +147,8 @@ struct regmap_irq_chip_data; #define TPS65219_ENABLE_LDO1_EN_MASK BIT(3) #define TPS65219_ENABLE_LDO2_EN_MASK BIT(4) #define TPS65219_ENABLE_LDO3_EN_MASK BIT(5) +#define TPS65215_ENABLE_LDO2_EN_MASK BIT(5) +#define TPS65214_ENABLE_LDO1_EN_MASK BIT(5) #define TPS65219_ENABLE_LDO4_EN_MASK BIT(6) /* power ON-OFF sequence slot */ #define TPS65219_BUCKS_LDOS_SEQUENCE_OFF_SLOT_MASK GENMASK(3, 0) @@ -163,20 +200,27 @@ struct regmap_irq_chip_data; #define TPS65219_REG_MASK_EFFECT_MASK GENMASK(2, 1) #define TPS65219_REG_MASK_INT_FOR_PB_MASK BIT(7) /* UnderVoltage - Short to GND - OverCurrent*/ -/* LDO3-4 */ +/* LDO3-4: only for TPS65219*/ #define TPS65219_INT_LDO3_SCG_MASK BIT(0) #define TPS65219_INT_LDO3_OC_MASK BIT(1) #define TPS65219_INT_LDO3_UV_MASK BIT(2) #define TPS65219_INT_LDO4_SCG_MASK BIT(3) #define TPS65219_INT_LDO4_OC_MASK BIT(4) #define TPS65219_INT_LDO4_UV_MASK BIT(5) -/* LDO1-2 */ +/* LDO1-2: TPS65214 & TPS65219 */ #define TPS65219_INT_LDO1_SCG_MASK BIT(0) #define TPS65219_INT_LDO1_OC_MASK BIT(1) #define TPS65219_INT_LDO1_UV_MASK BIT(2) #define TPS65219_INT_LDO2_SCG_MASK BIT(3) #define TPS65219_INT_LDO2_OC_MASK BIT(4) #define TPS65219_INT_LDO2_UV_MASK BIT(5) +/* TPS65215 LDO1-2*/ +#define TPS65215_INT_LDO1_SCG_MASK BIT(0) +#define TPS65215_INT_LDO1_OC_MASK BIT(1) +#define TPS65215_INT_LDO1_UV_MASK BIT(2) +#define TPS65215_INT_LDO2_SCG_MASK BIT(0) +#define TPS65215_INT_LDO2_OC_MASK BIT(1) +#define TPS65215_INT_LDO2_UV_MASK BIT(2) /* BUCK3 */ #define TPS65219_INT_BUCK3_SCG_MASK BIT(0) #define TPS65219_INT_BUCK3_OC_MASK BIT(1) @@ -191,12 +235,13 @@ struct regmap_irq_chip_data; #define TPS65219_INT_BUCK2_OC_MASK BIT(5) #define TPS65219_INT_BUCK2_NEG_OC_MASK BIT(6) #define TPS65219_INT_BUCK2_UV_MASK BIT(7) -/* Thermal Sensor */ +/* Thermal Sensor: TPS65219/TPS65215 */ #define TPS65219_INT_SENSOR_3_WARM_MASK BIT(0) +#define TPS65219_INT_SENSOR_3_HOT_MASK BIT(4) +/* Thermal Sensor: TPS65219/TPS65215/TPS65214 */ #define TPS65219_INT_SENSOR_2_WARM_MASK BIT(1) #define TPS65219_INT_SENSOR_1_WARM_MASK BIT(2) #define TPS65219_INT_SENSOR_0_WARM_MASK BIT(3) -#define TPS65219_INT_SENSOR_3_HOT_MASK BIT(4) #define TPS65219_INT_SENSOR_2_HOT_MASK BIT(5) #define TPS65219_INT_SENSOR_1_HOT_MASK BIT(6) #define TPS65219_INT_SENSOR_0_HOT_MASK BIT(7) @@ -207,6 +252,8 @@ struct regmap_irq_chip_data; #define TPS65219_INT_LDO1_RV_MASK BIT(3) #define TPS65219_INT_LDO2_RV_MASK BIT(4) #define TPS65219_INT_LDO3_RV_MASK BIT(5) +#define TPS65215_INT_LDO2_RV_MASK BIT(5) +#define TPS65214_INT_LDO2_RV_MASK BIT(5) #define TPS65219_INT_LDO4_RV_MASK BIT(6) /* Residual Voltage ShutDown */ #define TPS65219_INT_BUCK1_RV_SD_MASK BIT(0) @@ -215,6 +262,8 @@ struct regmap_irq_chip_data; #define TPS65219_INT_LDO1_RV_SD_MASK BIT(3) #define TPS65219_INT_LDO2_RV_SD_MASK BIT(4) #define TPS65219_INT_LDO3_RV_SD_MASK BIT(5) +#define TPS65215_INT_LDO2_RV_SD_MASK BIT(5) +#define TPS65214_INT_LDO1_RV_SD_MASK BIT(5) #define TPS65219_INT_LDO4_RV_SD_MASK BIT(6) #define TPS65219_INT_TIMEOUT_MASK BIT(7) /* Power Button */ @@ -240,7 +289,15 @@ enum { TPS65219_INT_LDO4_SCG, TPS65219_INT_LDO4_OC, TPS65219_INT_LDO4_UV, - /* LDO1-2 */ + /* TPS65215 LDO1*/ + TPS65215_INT_LDO1_SCG, + TPS65215_INT_LDO1_OC, + TPS65215_INT_LDO1_UV, + /* TPS65215 LDO2*/ + TPS65215_INT_LDO2_SCG, + TPS65215_INT_LDO2_OC, + TPS65215_INT_LDO2_UV, + /* LDO1-2: TPS65219/TPS65214 */ TPS65219_INT_LDO1_SCG, TPS65219_INT_LDO1_OC, TPS65219_INT_LDO1_UV, @@ -276,6 +333,8 @@ enum { TPS65219_INT_BUCK3_RV, TPS65219_INT_LDO1_RV, TPS65219_INT_LDO2_RV, + TPS65215_INT_LDO2_RV, + TPS65214_INT_LDO2_RV, TPS65219_INT_LDO3_RV, TPS65219_INT_LDO4_RV, /* Residual Voltage ShutDown */ @@ -283,6 +342,8 @@ enum { TPS65219_INT_BUCK2_RV_SD, TPS65219_INT_BUCK3_RV_SD, TPS65219_INT_LDO1_RV_SD, + TPS65214_INT_LDO1_RV_SD, + TPS65215_INT_LDO2_RV_SD, TPS65219_INT_LDO2_RV_SD, TPS65219_INT_LDO3_RV_SD, TPS65219_INT_LDO4_RV_SD, @@ -292,6 +353,23 @@ enum { TPS65219_INT_PB_RISING_EDGE_DETECT, }; +enum tps65214_regulator_id { + /* + * DCDC's same as TPS65219 + * LDO1 maps to TPS65219's LDO3 + * LDO2 is the same as TPS65219 + * + */ + TPS65214_LDO_1 = 3, + TPS65214_LDO_2 = 4, +}; + +enum tps65215_regulator_id { + /* DCDC's same as TPS65219 */ + /* LDO1 is the same as TPS65219 */ + TPS65215_LDO_2 = 4, +}; + enum tps65219_regulator_id { /* DCDC's */ TPS65219_BUCK_1, @@ -305,11 +383,40 @@ enum tps65219_regulator_id { }; /* Number of step-down converters available */ -#define TPS65219_NUM_DCDC 3 +#define TPS6521X_NUM_BUCKS 3 /* Number of LDO voltage regulators available */ #define TPS65219_NUM_LDO 4 +#define TPS65215_NUM_LDO 2 +#define TPS65214_NUM_LDO 2 /* Number of total regulators available */ -#define TPS65219_NUM_REGULATOR (TPS65219_NUM_DCDC + TPS65219_NUM_LDO) +#define TPS65219_NUM_REGULATOR (TPS6521X_NUM_BUCKS + TPS65219_NUM_LDO) +#define TPS65215_NUM_REGULATOR (TPS6521X_NUM_BUCKS + TPS65215_NUM_LDO) +#define TPS65214_NUM_REGULATOR (TPS6521X_NUM_BUCKS + TPS65214_NUM_LDO) + +/* Define the TPS65214 IRQ numbers */ +enum tps65214_irqs { + /* INT source registers */ + TPS65214_TO_RV_SD_SET_IRQ, + TPS65214_RV_SET_IRQ, + TPS65214_SYS_SET_IRQ, + TPS65214_BUCK_1_2_SET_IRQ, + TPS65214_BUCK_3_SET_IRQ, + TPS65214_LDO_1_2_SET_IRQ, + TPS65214_PB_SET_IRQ = 7, +}; + +/* Define the TPS65215 IRQ numbers */ +enum tps65215_irqs { + /* INT source registers */ + TPS65215_TO_RV_SD_SET_IRQ, + TPS65215_RV_SET_IRQ, + TPS65215_SYS_SET_IRQ, + TPS65215_BUCK_1_2_SET_IRQ, + TPS65215_BUCK_3_SET_IRQ, + TPS65215_LDO_1_SET_IRQ, + TPS65215_LDO_2_SET_IRQ, + TPS65215_PB_SET_IRQ, +}; /* Define the TPS65219 IRQ numbers */ enum tps65219_irqs { @@ -331,6 +438,7 @@ enum tps65219_irqs { * * @dev: MFD device * @regmap: Regmap for accessing the device registers + * @chip_id: Chip ID * @irq_data: Regmap irq data used for the irq chip * @nb: notifier block for the restart handler */ @@ -338,6 +446,7 @@ struct tps65219 { struct device *dev; struct regmap *regmap; + unsigned int chip_id; struct regmap_irq_chip_data *irq_data; struct notifier_block nb; }; diff --git a/include/linux/mfd/upboard-fpga.h b/include/linux/mfd/upboard-fpga.h new file mode 100644 index 000000000000..12231e40f5da --- /dev/null +++ b/include/linux/mfd/upboard-fpga.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * UP Board CPLD/FPGA driver + * + * Copyright (c) AAEON. All rights reserved. + * Copyright (C) 2024 Bootlin + * + * Author: Gary Wang <garywang@aaeon.com.tw> + * Author: Thomas Richard <thomas.richard@bootlin.com> + * + */ + +#ifndef __LINUX_MFD_UPBOARD_FPGA_H +#define __LINUX_MFD_UPBOARD_FPGA_H + +#define UPBOARD_REGISTER_SIZE 16 + +enum upboard_fpgareg { + UPBOARD_REG_PLATFORM_ID = 0x10, + UPBOARD_REG_FIRMWARE_ID = 0x11, + UPBOARD_REG_FUNC_EN0 = 0x20, + UPBOARD_REG_FUNC_EN1 = 0x21, + UPBOARD_REG_GPIO_EN0 = 0x30, + UPBOARD_REG_GPIO_EN1 = 0x31, + UPBOARD_REG_GPIO_EN2 = 0x32, + UPBOARD_REG_GPIO_DIR0 = 0x40, + UPBOARD_REG_GPIO_DIR1 = 0x41, + UPBOARD_REG_GPIO_DIR2 = 0x42, + UPBOARD_REG_MAX, +}; + +enum upboard_fpga_type { + UPBOARD_UP_FPGA, + UPBOARD_UP2_FPGA, +}; + +struct upboard_fpga_data { + enum upboard_fpga_type type; + const struct regmap_config *regmap_config; +}; + +struct upboard_fpga { + struct device *dev; + struct regmap *regmap; + struct gpio_desc *enable_gpio; + struct gpio_desc *reset_gpio; + struct gpio_desc *clear_gpio; + struct gpio_desc *strobe_gpio; + struct gpio_desc *datain_gpio; + struct gpio_desc *dataout_gpio; + unsigned int firmware_version; + const struct upboard_fpga_data *fpga_data; +}; + +#endif /* __LINUX_MFD_UPBOARD_FPGA_H */ diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 059dc94d20bb..dd372b0123a6 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -721,12 +721,6 @@ enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl); void mhi_soc_reset(struct mhi_controller *mhi_cntrl); /** - * mhi_device_get - Disable device low power mode - * @mhi_dev: Device associated with the channel - */ -void mhi_device_get(struct mhi_device *mhi_dev); - -/** * mhi_device_get_sync - Disable device low power mode. Synchronously * take the controller out of suspended state * @mhi_dev: Device associated with the channel @@ -777,18 +771,6 @@ int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev); void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev); /** - * mhi_queue_dma - Send or receive DMA mapped buffers from client device - * over MHI channel - * @mhi_dev: Device associated with the channels - * @dir: DMA direction for the channel - * @mhi_buf: Buffer for holding the DMA mapped data - * @len: Buffer length - * @mflags: MHI transfer flags used for the transfer - */ -int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir, - struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags); - -/** * mhi_queue_buf - Send or receive raw buffers from client device over MHI * channel * @mhi_dev: Device associated with the channels diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 591bf5b5e8dc..9af01bdd86d2 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,7 +44,6 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) -#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 002e49b2ebd9..aaa2114498d6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -144,16 +144,14 @@ const struct movable_operations *page_movable_ops(struct page *page) #ifdef CONFIG_NUMA_BALANCING int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node); -int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, - int node); +int migrate_misplaced_folio(struct folio *folio, int node); #else static inline int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ } -static inline int migrate_misplaced_folio(struct folio *folio, - struct vm_area_struct *vma, int node) +static inline int migrate_misplaced_folio(struct folio *folio, int node) { return -EAGAIN; /* can't migrate now */ } @@ -207,8 +205,8 @@ struct migrate_vma { unsigned long end; /* - * Set to the owner value also stored in page->pgmap->owner for - * migrating out of device private memory. The flags also need to + * Set to the owner value also stored in page_pgmap(page)->owner + * for migrating out of device private memory. The flags also need to * be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE. * The caller should always set this field when using mmu notifier * callbacks to avoid device MMU invalidations for device private @@ -229,6 +227,7 @@ void migrate_vma_pages(struct migrate_vma *migrate); void migrate_vma_finalize(struct migrate_vma *migrate); int migrate_device_range(unsigned long *src_pfns, unsigned long start, unsigned long npages); +int migrate_device_pfns(unsigned long *src_pfns, unsigned long npages); void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns, unsigned long npages); void migrate_device_finalize(unsigned long *src_pfns, diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index e781727c8916..79ddc0adbf2b 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -6,6 +6,17 @@ #include <linux/string.h> #include <linux/types.h> +/* + * The Min Heap API provides utilities for managing min-heaps, a binary tree + * structure where each node's value is less than or equal to its children's + * values, ensuring the smallest element is at the root. + * + * Users should avoid directly calling functions prefixed with __min_heap_*(). + * Instead, use the provided macro wrappers. + * + * For further details and examples, refer to Documentation/core-api/min_heap.rst. + */ + /** * Data structure to hold a min-heap. * @nr: Number of elements currently in the heap. @@ -15,8 +26,8 @@ */ #define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ struct _name { \ - int nr; \ - int size; \ + size_t nr; \ + size_t size; \ _type *data; \ _type preallocated[_nr]; \ } @@ -207,7 +218,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size) /* Initialize a min-heap. */ static __always_inline -void __min_heap_init_inline(min_heap_char *heap, void *data, int size) +void __min_heap_init_inline(min_heap_char *heap, void *data, size_t size) { heap->nr = 0; heap->size = size; @@ -218,7 +229,7 @@ void __min_heap_init_inline(min_heap_char *heap, void *data, int size) } #define min_heap_init_inline(_heap, _data, _size) \ - __min_heap_init_inline((min_heap_char *)_heap, _data, _size) + __min_heap_init_inline(container_of(&(_heap)->nr, min_heap_char, nr), _data, _size) /* Get the minimum element from the heap. */ static __always_inline @@ -228,7 +239,8 @@ void *__min_heap_peek_inline(struct min_heap_char *heap) } #define min_heap_peek_inline(_heap) \ - (__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap)) + (__minheap_cast(_heap) \ + __min_heap_peek_inline(container_of(&(_heap)->nr, min_heap_char, nr))) /* Check if the heap is full. */ static __always_inline @@ -238,11 +250,11 @@ bool __min_heap_full_inline(min_heap_char *heap) } #define min_heap_full_inline(_heap) \ - __min_heap_full_inline((min_heap_char *)_heap) + __min_heap_full_inline(container_of(&(_heap)->nr, min_heap_char, nr)) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down_inline(min_heap_char *heap, size_t pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { const unsigned long lsbit = elem_size & -elem_size; @@ -277,8 +289,8 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, } #define min_heap_sift_down_inline(_heap, _pos, _func, _args) \ - __min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ + __minheap_obj_size(_heap), _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline @@ -304,22 +316,23 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx } #define min_heap_sift_up_inline(_heap, _idx, _func, _args) \ - __min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ - _func, _args) + __min_heap_sift_up_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func, void *args) { - int i; + ssize_t i; for (i = heap->nr / 2 - 1; i >= 0; i--) __min_heap_sift_down_inline(heap, i, elem_size, func, args); } #define min_heapify_all_inline(_heap, _func, _args) \ - __min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline @@ -340,7 +353,8 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, } #define min_heap_pop_inline(_heap, _func, _args) \ - __min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -356,8 +370,8 @@ void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t } #define min_heap_pop_push_inline(_heap, _element, _func, _args) \ - __min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline @@ -365,7 +379,7 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele const struct min_heap_callbacks *func, void *args) { void *data = heap->data; - int pos; + size_t pos; if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) return false; @@ -382,8 +396,8 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele } #define min_heap_push_inline(_heap, _element, _func, _args) \ - __min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) /* Remove ith element from the heap, O(log2(nr)). */ static __always_inline @@ -411,13 +425,13 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, } #define min_heap_del_inline(_heap, _idx, _func, _args) \ - __min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ - _func, _args) + __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) -void __min_heap_init(min_heap_char *heap, void *data, int size); +void __min_heap_init(min_heap_char *heap, void *data, size_t size); void *__min_heap_peek(struct min_heap_char *heap); bool __min_heap_full(min_heap_char *heap); -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, const struct min_heap_callbacks *func, void *args); void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args); @@ -433,25 +447,31 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args); #define min_heap_init(_heap, _data, _size) \ - __min_heap_init((min_heap_char *)_heap, _data, _size) + __min_heap_init(container_of(&(_heap)->nr, min_heap_char, nr), _data, _size) #define min_heap_peek(_heap) \ - (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) + (__minheap_cast(_heap) __min_heap_peek(container_of(&(_heap)->nr, min_heap_char, nr))) #define min_heap_full(_heap) \ - __min_heap_full((min_heap_char *)_heap) + __min_heap_full(container_of(&(_heap)->nr, min_heap_char, nr)) #define min_heap_sift_down(_heap, _pos, _func, _args) \ - __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) + __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_sift_up(_heap, _idx, _func, _args) \ - __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + __min_heap_sift_up(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) #define min_heapify_all(_heap, _func, _args) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_pop(_heap, _func, _args) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_pop_push(_heap, _element, _func, _args) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_push(_heap, _element, _func, _args) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) + __min_heap_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_del(_heap, _idx, _func, _args) \ - __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) #endif /* _LINUX_MIN_HEAP_H */ diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 98008dd92153..eaaf5c008e4d 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -8,13 +8,10 @@ #include <linux/types.h> /* - * min()/max()/clamp() macros must accomplish three things: + * min()/max()/clamp() macros must accomplish several things: * * - Avoid multiple evaluations of the arguments (so side-effects like * "x++" happen only once) when non-constant. - * - Retain result as a constant expressions when called with only - * constant expressions (to avoid tripping VLA warnings in stack - * allocation usage). * - Perform signed v unsigned type-checking (to generate compile * errors instead of nasty runtime surprises). * - Unsigned char/short are always promoted to signed int and can be @@ -31,58 +28,54 @@ * bit #0 set if ok for unsigned comparisons * bit #1 set if ok for signed comparisons * - * In particular, statically non-negative signed integer - * expressions are ok for both. + * In particular, statically non-negative signed integer expressions + * are ok for both. * - * NOTE! Unsigned types smaller than 'int' are implicitly - * converted to 'int' in expressions, and are accepted for - * signed conversions for now. This is debatable. + * NOTE! Unsigned types smaller than 'int' are implicitly converted to 'int' + * in expressions, and are accepted for signed conversions for now. + * This is debatable. * - * Note that 'x' is the original expression, and 'ux' is - * the unique variable that contains the value. + * Note that 'x' is the original expression, and 'ux' is the unique variable + * that contains the value. * - * We use 'ux' for pure type checking, and 'x' for when - * we need to look at the value (but without evaluating - * it for side effects! Careful to only ever evaluate it - * with sizeof() or __builtin_constant_p() etc). + * We use 'ux' for pure type checking, and 'x' for when we need to look at the + * value (but without evaluating it for side effects! + * Careful to only ever evaluate it with sizeof() or __builtin_constant_p() etc). * - * Pointers end up being checked by the normal C type - * rules at the actual comparison, and these expressions - * only need to be careful to not cause warnings for - * pointer use. + * Pointers end up being checked by the normal C type rules at the actual + * comparison, and these expressions only need to be careful to not cause + * warnings for pointer use. */ -#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux)) -#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4)) -#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \ - __signed_type_use(x,ux):__unsigned_type_use(x,ux)) +#define __sign_use(ux) (is_signed_type(typeof(ux)) ? \ + (2 + __is_nonneg(ux)) : (1 + 2 * (sizeof(ux) < 4))) /* - * To avoid warnings about casting pointers to integers - * of different sizes, we need that special sign type. + * Check whether a signed value is always non-negative. * - * On 64-bit we can just always use 'long', since any - * integer or pointer type can just be cast to that. + * A cast is needed to avoid any warnings from values that aren't signed + * integer types (in which case the result doesn't matter). * - * This does not work for 128-bit signed integers since - * the cast would truncate them, but we do not use s128 - * types in the kernel (we do use 'u128', but they will - * be handled by the !is_signed_type() case). + * On 64-bit any integer or pointer type can safely be cast to 'long long'. + * But on 32-bit we need to avoid warnings about casting pointers to integers + * of different sizes without truncating 64-bit values so 'long' or 'long long' + * must be used depending on the size of the value. * - * NOTE! The cast is there only to avoid any warnings - * from when values that aren't signed integer types. + * This does not work for 128-bit signed integers since the cast would truncate + * them, but we do not use s128 types in the kernel (we do use 'u128', + * but they are handled by the !is_signed_type() case). */ -#ifdef CONFIG_64BIT - #define __signed_type(ux) long +#if __SIZEOF_POINTER__ == __SIZEOF_LONG_LONG__ +#define __is_nonneg(ux) statically_true((long long)(ux) >= 0) #else - #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L)) +#define __is_nonneg(ux) statically_true( \ + (typeof(__builtin_choose_expr(sizeof(ux) > 4, 1LL, 1L)))(ux) >= 0) #endif -#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0) -#define __types_ok(x,y,ux,uy) \ - (__sign_use(x,ux) & __sign_use(y,uy)) +#define __types_ok(ux, uy) \ + (__sign_use(ux) & __sign_use(uy)) -#define __types_ok3(x,y,z,ux,uy,uz) \ - (__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz)) +#define __types_ok3(ux, uy, uz) \ + (__sign_use(ux) & __sign_use(uy) & __sign_use(uz)) #define __cmp_op_min < #define __cmp_op_max > @@ -97,30 +90,13 @@ #define __careful_cmp_once(op, x, y, ux, uy) ({ \ __auto_type ux = (x); __auto_type uy = (y); \ - BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy), \ + BUILD_BUG_ON_MSG(!__types_ok(ux, uy), \ #op"("#x", "#y") signedness error"); \ __cmp(op, ux, uy); }) #define __careful_cmp(op, x, y) \ __careful_cmp_once(op, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) -#define __clamp(val, lo, hi) \ - ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) - -#define __clamp_once(val, lo, hi, uval, ulo, uhi) ({ \ - __auto_type uval = (val); \ - __auto_type ulo = (lo); \ - __auto_type uhi = (hi); \ - static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ - (lo) <= (hi), true), \ - "clamp() low limit " #lo " greater than high limit " #hi); \ - BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi), \ - "clamp("#val", "#lo", "#hi") signedness error"); \ - __clamp(uval, ulo, uhi); }) - -#define __careful_clamp(val, lo, hi) \ - __clamp_once(val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) - /** * min - return minimum of two values of the same or compatible types * @x: first value @@ -154,7 +130,7 @@ #define __careful_op3(op, x, y, z, ux, uy, uz) ({ \ __auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\ - BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz), \ + BUILD_BUG_ON_MSG(!__types_ok3(ux, uy, uz), \ #op"3("#x", "#y", "#z") signedness error"); \ __cmp(op, ux, __cmp(op, uy, uz)); }) @@ -177,6 +153,22 @@ __careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __cmp_once(min, type, x, y) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __cmp_once(max, type, x, y) + +/** * min_not_zero - return the minimum that is _not_ zero, unless both are zero * @x: value1 * @y: value2 @@ -186,39 +178,57 @@ typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) +#define __clamp(val, lo, hi) \ + ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) + +#define __clamp_once(type, val, lo, hi, uval, ulo, uhi) ({ \ + type uval = (val); \ + type ulo = (lo); \ + type uhi = (hi); \ + BUILD_BUG_ON_MSG(statically_true(ulo > uhi), \ + "clamp() low limit " #lo " greater than high limit " #hi); \ + BUILD_BUG_ON_MSG(!__types_ok3(uval, ulo, uhi), \ + "clamp("#val", "#lo", "#hi") signedness error"); \ + __clamp(uval, ulo, uhi); }) + +#define __careful_clamp(type, val, lo, hi) \ + __clamp_once(type, val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) + /** - * clamp - return a value clamped to a given range with strict typechecking + * clamp - return a value clamped to a given range with typechecking * @val: current value * @lo: lowest allowable value * @hi: highest allowable value * - * This macro does strict typechecking of @lo/@hi to make sure they are of the - * same type as @val. See the unnecessary pointer comparisons. - */ -#define clamp(val, lo, hi) __careful_clamp(val, lo, hi) - -/* - * ..and if you can't take the strict - * types, you can specify one yourself. - * - * Or not use min/max/clamp at all, of course. + * This macro checks @val/@lo/@hi to make sure they have compatible + * signedness. */ +#define clamp(val, lo, hi) __careful_clamp(__auto_type, val, lo, hi) /** - * min_t - return minimum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * @type to make all the comparisons. */ -#define min_t(type, x, y) __cmp_once(min, type, x, y) +#define clamp_t(type, val, lo, hi) __careful_clamp(type, val, lo, hi) /** - * max_t - return maximum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed + * integer type. */ -#define max_t(type, x, y) __cmp_once(max, type, x, y) +#define clamp_val(val, lo, hi) __careful_clamp(typeof(val), val, lo, hi) /* * Do not check the array parameter using __must_be_array(). @@ -263,31 +273,6 @@ */ #define max_array(array, len) __minmax_array(max, array, len) -/** - * clamp_t - return a value clamped to a given range using a given type - * @type: the type of variable to use - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of type - * @type to make all the comparisons. - */ -#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi)) - -/** - * clamp_val - return a value clamped to a given range using val's type - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of whatever - * type the input argument @val is. This is useful when @val is an unsigned - * type and @lo and @hi are literals that will otherwise be assigned a signed - * integer type. - */ -#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) - static inline bool in_range64(u64 val, u64 start, u64 len) { return (val - start) < len; @@ -326,9 +311,9 @@ static inline bool in_range32(u32 val, u32 start, u32 len) * Use these carefully: no type checking, and uses the arguments * multiple times. Use for obvious constants only. */ -#define MIN(a,b) __cmp(min,a,b) -#define MAX(a,b) __cmp(max,a,b) -#define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b)) -#define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b)) +#define MIN(a, b) __cmp(min, a, b) +#define MAX(a, b) __cmp(max, a, b) +#define MIN_T(type, a, b) __cmp(min, (type)(a), (type)(b)) +#define MAX_T(type, a, b) __cmp(max, (type)(a), (type)(b)) #endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 49eef10c8e59..4bf261d41a6d 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -60,7 +60,6 @@ struct misc_cg { struct misc_res res[MISC_CG_RES_TYPES]; }; -u64 misc_cg_res_total_usage(enum misc_res_type type); int misc_cg_set_capacity(enum misc_res_type type, u64 capacity); int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount); void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount); @@ -104,11 +103,6 @@ static inline void put_misc_cg(struct misc_cg *cg) #else /* !CONFIG_CGROUP_MISC */ -static inline u64 misc_cg_res_total_usage(enum misc_res_type type) -{ - return 0; -} - static inline int misc_cg_set_capacity(enum misc_res_type type, u64 capacity) { return 0; diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index c0fea6ca5076..69e110c2b86a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -76,7 +76,7 @@ struct device; struct attribute_group; -struct miscdevice { +struct miscdevice { int minor; const char *name; const struct file_operations *fops; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 27f42f713c89..f016263e1fcf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1135,7 +1135,7 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, unsigned int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, struct _rule_hw *eth_header); -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index cc647992f3d1..6822cfa5f4ad 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -538,6 +538,7 @@ struct mlx5_cmd_layout { }; enum mlx5_rfr_severity_bit_offsets { + MLX5_CRR_BIT_OFFSET = 0x6, MLX5_RFR_BIT_OFFSET = 0x7, }; @@ -1245,10 +1246,12 @@ enum mlx5_cap_type { MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, MLX5_CAP_CRYPTO = 0x1a, + MLX5_CAP_SHAMPO = 0x1d, MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, MLX5_CAP_ADV_VIRTUALIZATION = 0x26, + MLX5_CAP_ADV_RDMA = 0x28, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1343,6 +1346,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap) +#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(mdev, cap) \ + MLX5_CAP_ADV_RDMA(mdev, rdma_transport_rx_flow_table_properties.cap) + +#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(mdev, cap) \ + MLX5_CAP_ADV_RDMA(mdev, rdma_transport_tx_flow_table_properties.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) @@ -1382,6 +1391,10 @@ enum mlx5_qcam_feature_groups { MLX5_GET(adv_virtualization_cap, \ mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap) +#define MLX5_CAP_ADV_RDMA(mdev, cap) \ + MLX5_GET(adv_rdma_cap, \ + mdev->caps.hca[MLX5_CAP_ADV_RDMA]->cur, cap) + #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) @@ -1470,6 +1483,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_MACSEC(mdev, cap)\ MLX5_GET(macsec_cap, (mdev)->caps.hca[MLX5_CAP_MACSEC]->cur, cap) +#define MLX5_CAP_SHAMPO(mdev, cap) \ + MLX5_GET(shampo_cap, mdev->caps.hca[MLX5_CAP_SHAMPO]->cur, cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, @@ -1501,6 +1517,7 @@ enum { MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP = 0x13, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, + MLX5_PHYSICAL_LAYER_RECOVERY_GROUP = 0x1a, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP = 0x21, }; @@ -1516,8 +1533,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2 -#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1 +#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 6 +#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 4 #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fed666c5bd16..e6ba8f4f4bd1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -54,7 +54,6 @@ #include <linux/mlx5/doorbell.h> #include <linux/mlx5/eq.h> #include <linux/timecounter.h> -#include <linux/ptp_clock_kernel.h> #include <net/devlink.h> #define MLX5_ADEV_NAME "mlx5_core" @@ -160,9 +159,12 @@ enum { MLX5_REG_MIRC = 0x9162, MLX5_REG_MTPTM = 0x9180, MLX5_REG_MTCTR = 0x9181, + MLX5_REG_MRTCQ = 0x9182, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, + MLX5_REG_NIC_CAP = 0xC00D, MLX5_REG_DTOR = 0xC00E, + MLX5_REG_VHCA_ICM_CTRL = 0xC010, }; enum mlx5_qpts_trust_state { @@ -302,6 +304,8 @@ struct mlx5_cmd { struct semaphore sem; struct semaphore pages_sem; struct semaphore throttle_sem; + struct semaphore unprivileged_sem; + struct xarray privileged_uids; } vars; enum mlx5_cmdif_state state; void *cmd_alloc_buf; @@ -394,6 +398,7 @@ struct mlx5_core_rsc_common { enum mlx5_res_type res; refcount_t refcount; struct completion free; + bool invalid; }; struct mlx5_uars_page { @@ -676,33 +681,8 @@ struct mlx5_rsvd_gids { struct ida ida; }; -#define MAX_PIN_NUM 8 -struct mlx5_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; - u64 min_npps_period; - u64 min_out_pulse_duration_ns; -}; - -struct mlx5_timer { - struct cyclecounter cycles; - struct timecounter tc; - u32 nominal_c_mult; - unsigned long overflow_period; -}; - -struct mlx5_clock { - struct mlx5_nb pps_nb; - seqlock_t lock; - struct hwtstamp_config hwtstamp_config; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5_pps pps_info; - struct mlx5_timer timer; -}; - +struct mlx5_clock; +struct mlx5_clock_dev_state; struct mlx5_dm; struct mlx5_fw_tracer; struct mlx5_vxlan; @@ -786,7 +766,8 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif - struct mlx5_clock clock; + struct mlx5_clock *clock; + struct mlx5_clock_dev_state *clock_state; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; struct mlx5_rsc_dump *rsc_dump; @@ -986,6 +967,8 @@ struct mlx5_async_work { mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ u16 op_mod; /* cmd op_mod */ + u8 throttle_locked:1; + u8 unpriv_locked:1; void *out; /* pointer to the cmd output buffer */ }; @@ -1016,6 +999,8 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); +int mlx5_cmd_add_privileged_uid(struct mlx5_core_dev *dev, u16 uid); +void mlx5_cmd_remove_privileged_uid(struct mlx5_core_dev *dev, u16 uid); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index df73a2ccc9af..67256e776566 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -147,6 +147,8 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, /* reuse tun_opts for the mapped ipsec obj id when tun_id is 0 (invalid) */ #define ESW_IPSEC_RX_MAPPED_ID_MASK GENMASK(ESW_TUN_OPTS_BITS - 1, 0) +#define ESW_IPSEC_RX_MAPPED_ID_MATCH_MASK \ + GENMASK(31 - ESW_RESERVED_BITS, ESW_ZONE_ID_BITS) u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 438db888bde0..939e58c2f386 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,6 +40,9 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 0 +#define MLX5_FS_MAX_POOL_SIZE BIT(30) + enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_NONE, MLX5_FLOW_DESTINATION_TYPE_VPORT, @@ -108,6 +111,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC, MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, + MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX, + MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX, }; enum { @@ -163,7 +168,7 @@ struct mlx5_flow_destination { u32 tir_num; u32 ft_num; struct mlx5_flow_table *ft; - u32 counter_id; + struct mlx5_fc *counter; struct { u16 num; u16 vhca_id; @@ -192,9 +197,9 @@ struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); struct mlx5_flow_namespace * -mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, - enum mlx5_flow_namespace_type type, - int vport); +mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, + int vport_idx); struct mlx5_flow_table_attr { int prio; @@ -202,6 +207,7 @@ struct mlx5_flow_table_attr { u32 level; u32 flags; u16 uid; + u16 vport; struct mlx5_flow_table *next_ft; struct { @@ -238,6 +244,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); struct mlx5_exe_aso { u32 object_id; + int base_id; u8 type; u8 return_reg_id; union { @@ -299,6 +306,8 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); +struct mlx5_fc *mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size); +void mlx5_fc_local_destroy(struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 48d47181c7cd..2c09df4ee574 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1095,7 +1095,9 @@ struct mlx5_ifc_qos_cap_bits { u8 log_esw_max_sched_depth[0x4]; u8 reserved_at_10[0x10]; - u8 reserved_at_20[0xb]; + u8 reserved_at_20[0x9]; + u8 esw_cross_esw_sched[0x1]; + u8 reserved_at_2a[0x1]; u8 log_max_qos_nic_queue_group[0x5]; u8 reserved_at_30[0x10]; @@ -1103,7 +1105,8 @@ struct mlx5_ifc_qos_cap_bits { u8 packet_pacing_min_rate[0x20]; - u8 reserved_at_80[0x10]; + u8 reserved_at_80[0xb]; + u8 log_esw_max_rate_limit[0x5]; u8 packet_pacing_rate_table_size[0x10]; u8 esw_element_type[0x10]; @@ -1567,6 +1570,8 @@ enum { enum { MLX5_UCTX_CAP_RAW_TX = 1UL << 0, MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, + MLX5_UCTX_CAP_RDMA_CTRL = 1UL << 3, + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA = 1UL << 4, }; #define MLX5_FC_BULK_SIZE_FACTOR 128 @@ -1590,12 +1595,14 @@ enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, MLX5_STEERING_FORMAT_CONNECTX_7 = 2, + MLX5_STEERING_FORMAT_CONNECTX_8 = 3, }; struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x6]; u8 page_request_disable[0x1]; - u8 reserved_at_7[0x9]; + u8 abs_native_port_num[0x1]; + u8 reserved_at_8[0x8]; u8 shared_object_to_user_object_allowed[0x1]; u8 reserved_at_13[0xe]; u8 vhca_resource_manager[0x1]; @@ -1825,7 +1832,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 regexp_params[0x1]; u8 uar_sz[0x6]; u8 port_selection_cap[0x1]; - u8 reserved_at_251[0x1]; + u8 nic_cap_reg[0x1]; u8 umem_uid_0[0x1]; u8 reserved_at_253[0x5]; u8 log_pg_sz[0x8]; @@ -1986,7 +1993,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_geneve_tlv_options[0x8]; u8 reserved_at_568[0x3]; u8 max_geneve_tlv_option_data_len[0x5]; - u8 reserved_at_570[0x9]; + u8 reserved_at_570[0x1]; + u8 adv_rdma[0x1]; + u8 reserved_at_572[0x7]; u8 adv_virtualization[0x1]; u8 reserved_at_57a[0x6]; @@ -2135,7 +2144,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 log_min_mkey_entity_size[0x5]; u8 reserved_at_1b0[0x10]; - u8 reserved_at_1c0[0x60]; + u8 general_obj_types_127_64[0x40]; + u8 reserved_at_200[0x20]; u8 reserved_at_220[0x1]; u8 sw_vhca_id_valid[0x1]; @@ -2324,7 +2334,9 @@ struct mlx5_ifc_wq_bits { u8 headers_mkey[0x20]; u8 shampo_enable[0x1]; - u8 reserved_at_1e1[0x4]; + u8 reserved_at_1e1[0x1]; + u8 shampo_mode[0x2]; + u8 reserved_at_1e4[0x1]; u8 log_reservation_size[0x3]; u8 reserved_at_1e8[0x5]; u8 log_max_num_of_packets_per_reservation[0x3]; @@ -2633,6 +2645,12 @@ struct mlx5_ifc_field_select_802_1qau_rp_bits { u8 field_select_8021qaurp[0x20]; }; +struct mlx5_ifc_phys_layer_recovery_cntrs_bits { + u8 total_successful_recovery_events[0x20]; + + u8 reserved_at_20[0x7a0]; +}; + struct mlx5_ifc_phys_layer_cntrs_bits { u8 time_since_last_clear_high[0x20]; @@ -3322,6 +3340,14 @@ struct mlx5_ifc_dropped_packet_logged_bits { u8 reserved_at_0[0xe0]; }; +struct mlx5_ifc_nic_cap_reg_bits { + u8 reserved_at_0[0x1a]; + u8 vhca_icm_ctrl[0x1]; + u8 reserved_at_1b[0x5]; + + u8 reserved_at_20[0x60]; +}; + struct mlx5_ifc_default_timeout_bits { u8 to_multiplier[0x3]; u8 reserved_at_3[0x9]; @@ -3358,6 +3384,18 @@ struct mlx5_ifc_dtor_reg_bits { u8 reserved_at_1c0[0x20]; }; +struct mlx5_ifc_vhca_icm_ctrl_reg_bits { + u8 vhca_id_valid[0x1]; + u8 reserved_at_1[0xf]; + u8 vhca_id[0x10]; + + u8 reserved_at_20[0xa0]; + + u8 cur_alloc_icm[0x20]; + + u8 reserved_at_e0[0x120]; +}; + enum { MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, @@ -3696,6 +3734,22 @@ struct mlx5_ifc_crypto_cap_bits { u8 reserved_at_80[0x780]; }; +struct mlx5_ifc_shampo_cap_bits { + u8 reserved_at_0[0x3]; + u8 shampo_log_max_reservation_size[0x5]; + u8 reserved_at_8[0x3]; + u8 shampo_log_min_reservation_size[0x5]; + u8 shampo_min_mss_size[0x10]; + + u8 shampo_header_split[0x1]; + u8 shampo_header_split_data_merge[0x1]; + u8 reserved_at_22[0x1]; + u8 shampo_log_max_headers_entry_size[0x5]; + u8 reserved_at_28[0x18]; + + u8 reserved_at_40[0x7c0]; +}; + union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2; @@ -4105,6 +4159,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT = 0x5, }; enum { @@ -4113,34 +4168,41 @@ enum { ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, + ELEMENT_TYPE_CAP_MASK_RATE_LIMIT = 1 << 5, }; enum { TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, + TSAR_ELEMENT_TSAR_TYPE_TC_ARB = 0x3, }; enum { TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, TSAR_TYPE_CAP_MASK_ETS = 1 << 2, + TSAR_TYPE_CAP_MASK_TC_ARB = 1 << 3, }; struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; + u8 traffic_class[0x4]; + u8 reserved_at_4[0x4]; u8 tsar_type[0x8]; u8 reserved_at_10[0x10]; }; struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; + u8 reserved_at_0[0x4]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; struct mlx5_ifc_vport_tc_element_bits { u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; @@ -4165,7 +4227,9 @@ struct mlx5_ifc_scheduling_context_bits { u8 max_average_bw[0x20]; - u8 reserved_at_e0[0x120]; + u8 max_bw_obj_id[0x20]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_rqtc_bits { @@ -4788,6 +4852,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_ib_ext_port_cntrs_grp_data_layout_bits ib_ext_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs; + struct mlx5_ifc_phys_layer_recovery_cntrs_bits phys_layer_recovery_cntrs; u8 reserved_at_0[0x7c0]; }; @@ -6326,6 +6391,20 @@ struct mlx5_ifc_modify_other_hca_cap_in_bits { struct mlx5_ifc_other_hca_cap_bits other_capability; }; +struct mlx5_ifc_sw_owner_icm_root_params_bits { + u8 sw_owner_icm_root_1[0x40]; + + u8 sw_owner_icm_root_0[0x40]; +}; + +struct mlx5_ifc_rtc_params_bits { + u8 rtc_id_0[0x20]; + + u8 rtc_id_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_flow_table_context_bits { u8 reformat_en[0x1]; u8 decap_en[0x1]; @@ -6344,20 +6423,10 @@ struct mlx5_ifc_flow_table_context_bits { u8 lag_master_next_table_id[0x18]; u8 reserved_at_60[0x60]; - union { - struct { - u8 sw_owner_icm_root_1[0x40]; - u8 sw_owner_icm_root_0[0x40]; - } sws; - struct { - u8 rtc_id_0[0x20]; - - u8 rtc_id_1[0x20]; - - u8 reserved_at_100[0x40]; - - } hws; + union { + struct mlx5_ifc_sw_owner_icm_root_params_bits sws; + struct mlx5_ifc_rtc_params_bits hws; }; }; @@ -7006,6 +7075,7 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits { enum { MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START = 0x1, + MLX5_REFORMAT_CONTEXT_ANCHOR_VLAN_START = 0x2, MLX5_REFORMAT_CONTEXT_ANCHOR_IP_START = 0x7, MLX5_REFORMAT_CONTEXT_ANCHOR_TCP_UDP_START = 0x9, }; @@ -10133,7 +10203,21 @@ struct mlx5_ifc_pplm_reg_bits { u8 fec_override_admin_200g_2x[0x10]; u8 fec_override_admin_100g_1x[0x10]; - u8 reserved_at_260[0x20]; + u8 reserved_at_260[0x60]; + + u8 fec_override_cap_1600g_8x[0x10]; + u8 fec_override_cap_800g_4x[0x10]; + + u8 fec_override_cap_400g_2x[0x10]; + u8 fec_override_cap_200g_1x[0x10]; + + u8 fec_override_admin_1600g_8x[0x10]; + u8 fec_override_admin_800g_4x[0x10]; + + u8 fec_override_admin_400g_2x[0x10]; + u8 fec_override_admin_200g_1x[0x10]; + + u8 reserved_at_340[0x80]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -10507,7 +10591,11 @@ struct mlx5_ifc_mtutc_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x48]; + u8 reserved_at_0[0x10]; + u8 ppcnt_recovery_counters[0x1]; + u8 reserved_at_11[0xc]; + u8 fec_200G_per_lane_in_pplm[0x1]; + u8 reserved_at_1e[0x2a]; u8 fec_100G_per_lane_in_pplm[0x1]; u8 reserved_at_49[0x1f]; u8 fec_50G_per_lane_in_pplm[0x1]; @@ -10647,7 +10735,8 @@ struct mlx5_ifc_mcam_access_reg_bits3 { u8 regs_63_to_32[0x20]; - u8 regs_31_to_2[0x1e]; + u8 regs_31_to_3[0x1d]; + u8 mrtcq[0x1]; u8 mtctr[0x1]; u8 mtptm[0x1]; }; @@ -11044,6 +11133,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_TRUST_LOCKDOWN_ERR = 0x13, }; struct mlx5_ifc_initial_seg_bits { @@ -12419,12 +12509,17 @@ enum { }; enum { + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = BIT_ULL(0x13), +}; + +enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL = 0x53, MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; @@ -12992,6 +13087,44 @@ struct mlx5_ifc_load_vhca_state_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_adv_rdma_cap_bits { + u8 rdma_transport_manager[0x1]; + u8 rdma_transport_manager_other_eswitch[0x1]; + u8 reserved_at_2[0x1e]; + + u8 rcx_type[0x8]; + u8 reserved_at_28[0x2]; + u8 ps_entry_log_max_value[0x6]; + u8 reserved_at_30[0x6]; + u8 qp_max_ps_num_entry[0xa]; + + u8 mp_max_num_queues[0x8]; + u8 ps_user_context_max_log_size[0x8]; + u8 message_based_qp_and_striding_wq[0x8]; + u8 reserved_at_58[0x8]; + + u8 max_receive_send_message_size_stride[0x10]; + u8 reserved_at_70[0x10]; + + u8 max_receive_send_message_size_byte[0x20]; + + u8 reserved_at_a0[0x160]; + + struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_rx_flow_table_properties; + + struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_tx_flow_table_properties; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_bitmask_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_bitmask_support_2; + + u8 reserved_at_800[0x3800]; +}; + struct mlx5_ifc_adv_virtualization_cap_bits { u8 reserved_at_0[0x3]; u8 pg_track_log_max_num[0x5]; @@ -13138,4 +13271,12 @@ struct mlx5_ifc_msees_reg_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_mrtcq_reg_bits { + u8 reserved_at_0[0x40]; + + u8 rt_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e68d42b8ce65..58770b86f793 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -61,15 +61,6 @@ enum mlx5_an_status { #define MLX5_EEPROM_PAGE_LENGTH 256 #define MLX5_EEPROM_HIGH_PAGE_LENGTH 128 -struct mlx5_module_eeprom_query_params { - u16 size; - u16 offset; - u16 i2c_address; - u32 page; - u32 bank; - u32 module_number; -}; - enum mlx5e_link_mode { MLX5E_1000BASE_CX_SGMII = 0, MLX5E_1000BASE_KX = 1, @@ -115,9 +106,12 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, + MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14, MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17, MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, + MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20, MLX5E_EXT_LINK_MODES_NUMBER, }; @@ -142,12 +136,6 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; -struct mlx5_port_eth_proto { - u32 cap; - u32 admin; - u32 oper; -}; - #define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ @@ -160,14 +148,7 @@ int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, u16 *proto_oper, u8 local_port, u8 plane_index); -void mlx5_toggle_port_link(struct mlx5_core_dev *dev); -int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, - enum mlx5_port_status status); -int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, - enum mlx5_port_status *status); -int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); - -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); + void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); @@ -175,65 +156,4 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, u8 *vl_hw_cap, u8 local_port); -int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause); -int mlx5_query_port_pause(struct mlx5_core_dev *dev, - u32 *rx_pause, u32 *tx_pause); - -int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx); -int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, - u8 *pfc_en_rx); - -int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, - u16 stall_critical_watermark, - u16 stall_minor_watermark); -int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, - u16 *stall_critical_watermark, u16 *stall_minor_watermark); - -int mlx5_max_tc(struct mlx5_core_dev *mdev); - -int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); -int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, - u8 prio, u8 *tc); -int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); -int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, - u8 tc, u8 *tc_group); -int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); -int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, - u8 tc, u8 *bw_pct); -int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, - u8 *max_bw_value, - u8 *max_bw_unit); -int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, - u8 *max_bw_value, - u8 *max_bw_unit); -int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); -int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); - -int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen); -int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen); -int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); -void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, - bool *enabled); -int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, - u16 offset, u16 size, u8 *data); -int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, - struct mlx5_module_eeprom_query_params *params, u8 *data); - -int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); -int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); - -int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); -int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); -int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); -int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); - -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, - struct mlx5_port_eth_proto *eproto); -bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev); -u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy); -u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy); -int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); - #endif /* __MLX5_PORT_H__ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index b1c3db9cf355..e51dba8398f7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -32,6 +32,7 @@ #include <linux/memremap.h> #include <linux/slab.h> #include <linux/cacheinfo.h> +#include <linux/rcuwait.h> struct mempolicy; struct anon_vma; @@ -40,22 +41,10 @@ struct user_struct; struct pt_regs; struct folio_batch; -extern int sysctl_page_lock_unfairness; - +void arch_mm_preinit(void); void mm_core_init(void); void init_mm_internals(void); -#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */ -extern unsigned long max_mapnr; - -static inline void set_max_mapnr(unsigned long limit) -{ - max_mapnr = limit; -} -#else -static inline void set_max_mapnr(unsigned long limit) { } -#endif - extern atomic_long_t _totalram_pages; static inline unsigned long totalram_pages(void) { @@ -78,8 +67,6 @@ static inline void totalram_pages_add(long count) } extern void * high_memory; -extern int page_cluster; -extern const int page_cluster_max; #ifdef CONFIG_SYSCTL extern int sysctl_legacy_va_layout; @@ -209,17 +196,6 @@ extern int sysctl_max_map_count; extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; -extern int sysctl_overcommit_memory; -extern int sysctl_overcommit_ratio; -extern unsigned long sysctl_overcommit_kbytes; - -int overcommit_ratio_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); -int overcommit_kbytes_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); -int overcommit_policy_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); - #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio)) @@ -257,8 +233,6 @@ void setup_initial_init_mm(void *start_code, void *end_code, struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); -/* Use only if VMA has no other users */ -void __vm_area_free(struct vm_area_struct *vma); #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; @@ -411,7 +385,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 38 +# define VM_UFFD_MINOR_BIT 41 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE @@ -697,13 +671,57 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {} #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_PER_VMA_LOCK +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + static struct lock_class_key lockdep_key; + + lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); +#endif + if (reset_refcnt) + refcount_set(&vma->vm_refcnt, 0); + vma->vm_lock_seq = UINT_MAX; +} + +static inline bool is_vma_writer_only(int refcnt) +{ + /* + * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma + * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on + * a detached vma happens only in vma_mark_detached() and is a rare + * case, therefore most of the time there will be no unnecessary wakeup. + */ + return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; +} + +static inline void vma_refcount_put(struct vm_area_struct *vma) +{ + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *mm = vma->vm_mm; + int oldcnt; + + rwsem_release(&vma->vmlock_dep_map, _RET_IP_); + if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { + + if (is_vma_writer_only(oldcnt - 1)) + rcuwait_wake_up(&mm->vma_writer_wait); + } +} + /* * Try to read-lock a vma. The function is allowed to occasionally yield false * locked result to avoid performance overhead, in which case we fall back to * using mmap_lock. The function should never yield false unlocked result. + * False locked result is possible if mm_lock_seq overflows or if vma gets + * reused and attached to a different mm before we lock it. + * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got + * detached. */ -static inline bool vma_start_read(struct vm_area_struct *vma) +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) { + int oldcnt; + /* * Check before locking. A race might cause false locked result. * We can use READ_ONCE() for the mm_lock_seq here, and don't need @@ -711,16 +729,26 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * we don't rely on for anything - the mm_lock_seq read against which we * need ordering is below. */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) - return false; + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) + return NULL; - if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) - return false; + /* + * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() + * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. + * Acquire fence is required here to avoid reordering against later + * vm_lock_seq check and checks inside lock_vma_under_rcu(). + */ + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) { + /* return EAGAIN if vma got detached from under us */ + return oldcnt ? NULL : ERR_PTR(-EAGAIN); + } + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); /* - * Overflow might produce false locked result. + * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq + * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq * modification invalidates all existing locks. * * We must use ACQUIRE semantics for the mm_lock_seq so that if we are @@ -728,22 +756,51 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * after it has been unlocked. * This pairs with RELEASE semantics in vma_end_write_all(). */ - if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) { - up_read(&vma->vm_lock->lock); - return false; + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { + vma_refcount_put(vma); + return NULL; } + + return vma; +} + +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) +{ + int oldcnt; + + mmap_assert_locked(vma->vm_mm); + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) + return false; + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); return true; } +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline bool vma_start_read_locked(struct vm_area_struct *vma) +{ + return vma_start_read_locked_nested(vma, 0); +} + static inline void vma_end_read(struct vm_area_struct *vma) { - rcu_read_lock(); /* keeps vma alive till the end of up_read */ - up_read(&vma->vm_lock->lock); - rcu_read_unlock(); + vma_refcount_put(vma); } /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) { mmap_assert_write_locked(vma->vm_mm); @@ -751,10 +808,12 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) * current task is holding mmap_write_lock, both vma->vm_lock_seq and * mm->mm_lock_seq can't be concurrently modified. */ - *mm_lock_seq = vma->vm_mm->mm_lock_seq; + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; return (vma->vm_lock_seq == *mm_lock_seq); } +void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); + /* * Begin writing to a VMA. * Exclude concurrent readers under the per-VMA lock until the currently @@ -762,43 +821,53 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) */ static inline void vma_start_write(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; if (__is_vma_write_locked(vma, &mm_lock_seq)) return; - down_write(&vma->vm_lock->lock); - /* - * We should use WRITE_ONCE() here because we can have concurrent reads - * from the early lockless pessimistic check in vma_start_read(). - * We don't really care about the correctness of that early check, but - * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. - */ - WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); - up_write(&vma->vm_lock->lock); + __vma_start_write(vma, mm_lock_seq); } static inline void vma_assert_write_locked(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); } static inline void vma_assert_locked(struct vm_area_struct *vma) { - if (!rwsem_is_locked(&vma->vm_lock->lock)) - vma_assert_write_locked(vma); + unsigned int mm_lock_seq; + + VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && + !__is_vma_write_locked(vma, &mm_lock_seq), vma); +} + +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ +static inline void vma_assert_attached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); +} + +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); } -static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) +static inline void vma_mark_attached(struct vm_area_struct *vma) { - /* When detaching vma should be write-locked */ - if (detached) - vma_assert_write_locked(vma); - vma->detached = detached; + vma_assert_write_locked(vma); + vma_assert_detached(vma); + refcount_set_release(&vma->vm_refcnt, 1); } +void vma_mark_detached(struct vm_area_struct *vma); + static inline void release_fault_lock(struct vm_fault *vmf) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) @@ -820,14 +889,18 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, #else /* CONFIG_PER_VMA_LOCK */ -static inline bool vma_start_read(struct vm_area_struct *vma) - { return false; } +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) + { return NULL; } static inline void vma_end_read(struct vm_area_struct *vma) {} static inline void vma_start_write(struct vm_area_struct *vma) {} static inline void vma_assert_write_locked(struct vm_area_struct *vma) { mmap_assert_write_locked(vma->vm_mm); } -static inline void vma_mark_detached(struct vm_area_struct *vma, - bool detached) {} +static inline void vma_assert_attached(struct vm_area_struct *vma) {} +static inline void vma_assert_detached(struct vm_area_struct *vma) {} +static inline void vma_mark_attached(struct vm_area_struct *vma) {} +static inline void vma_mark_detached(struct vm_area_struct *vma) {} static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address) @@ -854,18 +927,13 @@ static inline void assert_fault_locked(struct vm_fault *vmf) extern const struct vm_operations_struct vma_dummy_vm_ops; -/* - * WARNING: vma_init does not initialize vma->vm_lock. - * Use vm_area_alloc()/vm_area_free() if vma needs locking. - */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_mark_detached(vma, false); - vma_numab_state_init(vma); + vma_lock_init(vma, false); } /* Use when VMA is not part of the VMA tree and needs no locking */ @@ -1058,6 +1126,7 @@ static inline int vma_iter_bulk_store(struct vma_iterator *vmi, if (unlikely(mas_is_err(&vmi->mas))) return -ENOMEM; + vma_mark_attached(vma); return 0; } @@ -1098,6 +1167,25 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +extern void prep_compound_page(struct page *page, unsigned int order); + +static inline unsigned int folio_large_order(const struct folio *folio) +{ + return folio->_flags_1 & 0xff; +} + +#ifdef NR_PAGES_IN_LARGE_FOLIO +static inline long folio_large_nr_pages(const struct folio *folio) +{ + return folio->_nr_pages; +} +#else +static inline long folio_large_nr_pages(const struct folio *folio) +{ + return 1L << folio_large_order(folio); +} +#endif + /* * compound_order() can be called without holding a reference, which means * that niceties like page_folio() don't work. These callers should be @@ -1111,7 +1199,7 @@ static inline unsigned int compound_order(struct page *page) if (!test_bit(PG_head, &folio->flags)) return 0; - return folio->_flags_1 & 0xff; + return folio_large_order(folio); } /** @@ -1127,7 +1215,24 @@ static inline unsigned int folio_order(const struct folio *folio) { if (!folio_test_large(folio)) return 0; - return folio->_flags_1 & 0xff; + return folio_large_order(folio); +} + +/** + * folio_reset_order - Reset the folio order and derived _nr_pages + * @folio: The folio. + * + * Reset the order and derived _nr_pages to 0. Must only be used in the + * process of splitting large folios. + */ +static inline void folio_reset_order(struct folio *folio) +{ + if (WARN_ON_ONCE(!folio_test_large(folio))) + return; + folio->_flags_1 &= ~0xffUL; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif } #include <linux/huge_mm.h> @@ -1220,6 +1325,8 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline int folio_entire_mapcount(const struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio_large_order(folio) == 1)) + return 0; return atomic_read(&folio->_entire_mapcount) + 1; } @@ -1419,25 +1526,6 @@ vm_fault_t finish_fault(struct vm_fault *vmf); * back into memory. */ -#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) -DECLARE_STATIC_KEY_FALSE(devmap_managed_key); - -bool __put_devmap_managed_folio_refs(struct folio *folio, int refs); -static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) -{ - if (!static_branch_unlikely(&devmap_managed_key)) - return false; - if (!folio_is_zone_device(folio)) - return false; - return __put_devmap_managed_folio_refs(folio, refs); -} -#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) -{ - return false; -} -#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ - /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) @@ -1458,7 +1546,10 @@ static inline void folio_get(struct folio *folio) static inline void get_page(struct page *page) { - folio_get(page_folio(page)); + struct folio *folio = page_folio(page); + if (WARN_ON_ONCE(folio_test_slab(folio))) + return; + folio_get(folio); } static inline __must_check bool try_get_page(struct page *page) @@ -1552,12 +1643,9 @@ static inline void put_page(struct page *page) { struct folio *folio = page_folio(page); - /* - * For some devmap managed pages we need to catch refcount transition - * from 2 to 1: - */ - if (put_devmap_managed_folio_refs(folio, 1)) + if (folio_test_slab(folio)) return; + folio_put(folio); } @@ -1916,6 +2004,13 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } +static inline bool folio_has_pincount(const struct folio *folio) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return folio_test_large(folio); + return folio_order(folio) > 1; +} + /** * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. * @folio: The folio. @@ -1932,7 +2027,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) * get that many refcounts, and b) all the callers of this routine are * expected to be able to deal gracefully with a false positive. * - * For large folios, the result will be exactly correct. That's because + * For most large folios, the result will be exactly correct. That's because * we have more tracking data available: the _pincount field is used * instead of the GUP_PIN_COUNTING_BIAS scheme. * @@ -1943,7 +2038,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) */ static inline bool folio_maybe_dma_pinned(struct folio *folio) { - if (folio_test_large(folio)) + if (folio_has_pincount(folio)) return atomic_read(&folio->_pincount) > 0; /* @@ -2015,6 +2110,13 @@ static inline bool folio_is_longterm_pinnable(struct folio *folio) if (folio_is_device_coherent(folio)) return false; + /* + * Filesystems can only tolerate transient delays to truncate and + * hole-punch operations + */ + if (folio_is_fsdax(folio)) + return false; + /* Otherwise, non-movable zone folios can be pinned. */ return !folio_is_zone_movable(folio); @@ -2058,11 +2160,7 @@ static inline long folio_nr_pages(const struct folio *folio) { if (!folio_test_large(folio)) return 1; -#ifdef CONFIG_64BIT - return folio->_folio_nr_pages; -#else - return 1L << (folio->_flags_1 & 0xff); -#endif + return folio_large_nr_pages(folio); } /* Only hugetlbfs can allocate folios larger than MAX_ORDER */ @@ -2077,24 +2175,20 @@ static inline long folio_nr_pages(const struct folio *folio) * page. compound_nr() can be called on a tail page, and is defined to * return 1 in that case. */ -static inline unsigned long compound_nr(struct page *page) +static inline long compound_nr(struct page *page) { struct folio *folio = (struct folio *)page; if (!test_bit(PG_head, &folio->flags)) return 1; -#ifdef CONFIG_64BIT - return folio->_folio_nr_pages; -#else - return 1L << (folio->_flags_1 & 0xff); -#endif + return folio_large_nr_pages(folio); } /** * thp_nr_pages - The number of regular pages in this huge page. * @page: The head page of a huge page. */ -static inline int thp_nr_pages(struct page *page) +static inline long thp_nr_pages(struct page *page) { return folio_nr_pages((struct folio *)page); } @@ -2149,23 +2243,18 @@ static inline size_t folio_size(const struct folio *folio) } /** - * folio_likely_mapped_shared - Estimate if the folio is mapped into the page - * tables of more than one MM + * folio_maybe_mapped_shared - Whether the folio is mapped into the page + * tables of more than one MM * @folio: The folio. * - * This function checks if the folio is currently mapped into more than one - * MM ("mapped shared"), or if the folio is only mapped into a single MM - * ("mapped exclusively"). + * This function checks if the folio maybe currently mapped into more than one + * MM ("maybe mapped shared"), or if the folio is certainly mapped into a single + * MM ("mapped exclusively"). * * For KSM folios, this function also returns "mapped shared" when a folio is * mapped multiple times into the same MM, because the individual page mappings * are independent. * - * As precise information is not easily available for all folios, this function - * estimates the number of MMs ("sharers") that are currently mapping a folio - * using the number of times the first page of the folio is currently mapped - * into page tables. - * * For small anonymous folios and anonymous hugetlb folios, the return * value will be exactly correct: non-KSM folios can only be mapped at most once * into an MM, and they cannot be partially mapped. KSM folios are @@ -2173,8 +2262,8 @@ static inline size_t folio_size(const struct folio *folio) * * For other folios, the result can be fuzzy: * #. For partially-mappable large folios (THP), the return value can wrongly - * indicate "mapped exclusively" (false negative) when the folio is - * only partially mapped into at least one MM. + * indicate "mapped shared" (false positive) if a folio was mapped by + * more than two MMs at one point in time. * #. For pagecache folios (including hugetlb), the return value can wrongly * indicate "mapped shared" (false positive) when two VMAs in the same MM * cover the same file range. @@ -2191,7 +2280,7 @@ static inline size_t folio_size(const struct folio *folio) * * Return: Whether the folio is estimated to be mapped into more than one MM. */ -static inline bool folio_likely_mapped_shared(struct folio *folio) +static inline bool folio_maybe_mapped_shared(struct folio *folio) { int mapcount = folio_mapcount(folio); @@ -2199,16 +2288,22 @@ static inline bool folio_likely_mapped_shared(struct folio *folio) if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio))) return mapcount > 1; - /* A single mapping implies "mapped exclusively". */ - if (mapcount <= 1) - return false; - - /* If any page is mapped more than once we treat it "mapped shared". */ - if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio)) + /* + * vm_insert_page() without CONFIG_TRANSPARENT_HUGEPAGE ... + * simply assume "mapped shared", nobody should really care + * about this for arbitrary kernel allocations. + */ + if (!IS_ENABLED(CONFIG_MM_ID)) return true; - /* Let's guess based on the first subpage. */ - return atomic_read(&folio->_mapcount) > 0; + /* + * A single mapping implies "mapped exclusively", even if the + * folio flag says something different: it's easier to handle this + * case here instead of on the RMAP hot path. + */ + if (mapcount <= 1) + return false; + return folio_test_large_maybe_mapped_shared(folio); } #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE @@ -2320,6 +2415,7 @@ extern void pagefault_out_of_memory(void); struct zap_details { struct folio *single_folio; /* Locked folio to be unmapped */ bool even_cows; /* Zap COWed private pages too? */ + bool reclaim_pt; /* Need reclaim page tables? */ zap_flags_t zap_flags; /* Extra flags for zapping */ }; @@ -2416,11 +2512,13 @@ struct follow_pfnmap_args { * Outputs: * * @pfn: the PFN of the address + * @addr_mask: address mask covering pfn * @pgprot: the pgprot_t of the mapping * @writable: whether the mapping is writable * @special: whether the mapping is a special mapping (real PFN maps) */ unsigned long pfn; + unsigned long addr_mask; pgprot_t pgprot; bool writable; bool special; @@ -2485,6 +2583,11 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); +#ifdef CONFIG_BPF_SYSCALL +extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); +#endif + long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, @@ -2548,7 +2651,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, struct task_struct *task, bool bypass_rlim); struct kvec; -struct page *get_dump_page(unsigned long addr); +struct page *get_dump_page(unsigned long addr, int *locked); bool folio_mark_dirty(struct folio *folio); bool folio_mark_dirty_lock(struct folio *folio); @@ -2991,18 +3094,15 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ -static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) +static inline void __pagetable_ctor(struct ptdesc *ptdesc) { struct folio *folio = ptdesc_folio(ptdesc); - if (!ptlock_init(ptdesc)) - return false; __folio_set_pgtable(folio); lruvec_stat_add_folio(folio, NR_PAGETABLE); - return true; } -static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) +static inline void pagetable_dtor(struct ptdesc *ptdesc) { struct folio *folio = ptdesc_folio(ptdesc); @@ -3011,6 +3111,20 @@ static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) lruvec_stat_sub_folio(folio, NR_PAGETABLE); } +static inline void pagetable_dtor_free(struct ptdesc *ptdesc) +{ + pagetable_dtor(ptdesc); + pagetable_free(ptdesc); +} + +static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) +{ + if (!ptlock_init(ptdesc)) + return false; + __pagetable_ctor(ptdesc); + return true; +} + pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); static inline pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) @@ -3087,14 +3201,6 @@ static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) return ptlock_init(ptdesc); } -static inline void pmd_ptlock_free(struct ptdesc *ptdesc) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); -#endif - ptlock_free(ptdesc); -} - #define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) #else @@ -3105,7 +3211,6 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) } static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } -static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -3120,25 +3225,13 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); - if (!pmd_ptlock_init(ptdesc)) return false; - __folio_set_pgtable(folio); ptdesc_pmd_pts_init(ptdesc); - lruvec_stat_add_folio(folio, NR_PAGETABLE); + __pagetable_ctor(ptdesc); return true; } -static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc) -{ - struct folio *folio = ptdesc_folio(ptdesc); - - pmd_ptlock_free(ptdesc); - __folio_clear_pgtable(folio); - lruvec_stat_sub_folio(folio, NR_PAGETABLE); -} - /* * No scalability reason to split PUD locks yet, but follow the same pattern * as the PMD locks to make it easier if we decide to. The VM should not be @@ -3160,18 +3253,17 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) static inline void pagetable_pud_ctor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); - - __folio_set_pgtable(folio); - lruvec_stat_add_folio(folio, NR_PAGETABLE); + __pagetable_ctor(ptdesc); } -static inline void pagetable_pud_dtor(struct ptdesc *ptdesc) +static inline void pagetable_p4d_ctor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); + __pagetable_ctor(ptdesc); +} - __folio_clear_pgtable(folio); - lruvec_stat_sub_folio(folio, NR_PAGETABLE); +static inline void pagetable_pgd_ctor(struct ptdesc *ptdesc) +{ + __pagetable_ctor(ptdesc); } extern void __init pagecache_init(void); @@ -3193,7 +3285,6 @@ extern void reserve_bootmem_region(phys_addr_t start, /* Free the reserved page into the buddy system, so it gets managed. */ void free_reserved_page(struct page *page); -#define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) { @@ -3324,6 +3415,8 @@ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admi extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void exit_mmap(struct mm_struct *); int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); +bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, bool write); static inline int check_data_rlimit(unsigned long rlim, unsigned long new, @@ -3371,9 +3464,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, return __get_unmapped_area(file, addr, len, pgoff, flags, 0); } -extern unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, @@ -3437,9 +3527,6 @@ extern unsigned long stack_guard_gap; int expand_stack_locked(struct vm_area_struct *vma, unsigned long address); struct vm_area_struct *expand_stack(struct mm_struct * mm, unsigned long addr); -/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ -int expand_downwards(struct vm_area_struct *vma, unsigned long address); - /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, @@ -3557,6 +3644,8 @@ int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, unsigned long num); +vm_fault_t vmf_insert_page_mkwrite(struct vm_fault *vmf, struct page *page, + bool write); vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, @@ -3817,12 +3906,6 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); -#ifdef CONFIG_SYSCTL -extern int sysctl_drop_caches; -int drop_caches_sysctl_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); -#endif - void drop_slab(void); #ifndef CONFIG_MMU @@ -3841,6 +3924,7 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) #endif void *sparse_buffer_alloc(unsigned long size); +unsigned long section_map_size(void); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); @@ -3849,7 +3933,8 @@ p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, - struct vmem_altmap *altmap, struct page *reuse); + struct vmem_altmap *altmap, unsigned long ptpfn, + unsigned long flags); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, @@ -3865,6 +3950,12 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); +int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); +int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); +void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end, @@ -3931,9 +4022,6 @@ static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, } #endif -void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, - unsigned long nr_pages); - enum mf_flags { MF_COUNT_INCREASED = 1 << 0, MF_ACTION_REQUIRED = 1 << 1, @@ -4094,69 +4182,6 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, pgoff_t first_index, pgoff_t nr); #endif -extern int sysctl_nr_trim_pages; - -#ifdef CONFIG_PRINTK -void mem_dump_obj(void *object); -#else -static inline void mem_dump_obj(void *object) {} -#endif - -static inline bool is_write_sealed(int seals) -{ - return seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE); -} - -/** - * is_readonly_sealed - Checks whether write-sealed but mapped read-only, - * in which case writes should be disallowing moving - * forwards. - * @seals: the seals to check - * @vm_flags: the VMA flags to check - * - * Returns whether readonly sealed, in which case writess should be disallowed - * going forward. - */ -static inline bool is_readonly_sealed(int seals, vm_flags_t vm_flags) -{ - /* - * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as - * MAP_SHARED and read-only, take care to not allow mprotect to - * revert protections on such mappings. Do this only for shared - * mappings. For private mappings, don't need to mask - * VM_MAYWRITE as we still want them to be COW-writable. - */ - if (is_write_sealed(seals) && - ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_SHARED)) - return true; - - return false; -} - -/** - * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and - * handle them. - * @seals: the seals to check - * @vma: the vma to operate on - * - * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper - * check/handling on the vma flags. Return 0 if check pass, or <0 for errors. - */ -static inline int seal_check_write(int seals, struct vm_area_struct *vma) -{ - if (!is_write_sealed(seals)) - return 0; - - /* - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * write seals are active. - */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) - return -EPERM; - - return 0; -} - #ifdef CONFIG_ANON_VMA_NAME int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, unsigned long len_in, @@ -4197,6 +4222,7 @@ void vma_pgtable_walk_begin(struct vm_area_struct *vma); void vma_pgtable_walk_end(struct vm_area_struct *vma); int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); +int reserve_mem_release_by_name(const char *name); #ifdef CONFIG_64BIT int do_mseal(unsigned long start, size_t len_in, unsigned long flags); @@ -4229,4 +4255,72 @@ int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *st int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); + +/* + * mseal of userspace process's system mappings. + */ +#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS +#define VM_SEALED_SYSMAP VM_SEALED +#else +#define VM_SEALED_SYSMAP VM_NONE +#endif + +/* + * DMA mapping IDs for page_pool + * + * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot + * mistake a valid kernel pointer with any of the values we write into this + * field. + * + * On architectures that set POISON_POINTER_DELTA, this is already ensured, + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is + * 0, we make sure that we leave the two topmost bits empty, as that guarantees + * we won't mistake a valid kernel pointer for a value we set, regardless of the + * VMSPLIT setting. + * + * Altogether, this means that the number of bits available is constrained by + * the size of an unsigned long (at the upper end, subtracting two bits per the + * above), and the definition of PP_SIGNATURE (with or without + * POISON_POINTER_DELTA). + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +/* Always leave out the topmost two; see above. */ +#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) + +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(struct page *page) +{ + return false; +} +#endif + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 1b6a917fffa4..f9157a0c42a5 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -133,31 +133,25 @@ static inline int lru_hist_from_seq(unsigned long seq) return seq % NR_HIST_GENS; } -static inline int lru_tier_from_refs(int refs) +static inline int lru_tier_from_refs(int refs, bool workingset) { VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH)); - /* see the comment in folio_lru_refs() */ - return order_base_2(refs + 1); + /* see the comment on MAX_NR_TIERS */ + return workingset ? MAX_NR_TIERS - 1 : order_base_2(refs); } static inline int folio_lru_refs(struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags); - bool workingset = flags & BIT(PG_workingset); + if (!(flags & BIT(PG_referenced))) + return 0; /* - * Return the number of accesses beyond PG_referenced, i.e., N-1 if the - * total number of accesses is N>1, since N=0,1 both map to the first - * tier. lru_tier_from_refs() will account for this off-by-one. Also see - * the comment on MAX_NR_TIERS. + * Return the total number of accesses including PG_referenced. Also see + * the comment on LRU_REFS_FLAGS. */ - return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; -} - -static inline void folio_clear_lru_refs(struct folio *folio) -{ - set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0); + return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1; } static inline int folio_lru_gen(struct folio *folio) @@ -223,11 +217,43 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } +static inline unsigned long lru_gen_folio_seq(struct lruvec *lruvec, struct folio *folio, + bool reclaiming) +{ + int gen; + int type = folio_is_file_lru(folio); + struct lru_gen_folio *lrugen = &lruvec->lrugen; + + /* + * +-----------------------------------+-----------------------------------+ + * | Accessed through page tables and | Accessed through file descriptors | + * | promoted by folio_update_gen() | and protected by folio_inc_gen() | + * +-----------------------------------+-----------------------------------+ + * | PG_active (set while isolated) | | + * +-----------------+-----------------+-----------------+-----------------+ + * | PG_workingset | PG_referenced | PG_workingset | LRU_REFS_FLAGS | + * +-----------------------------------+-----------------------------------+ + * |<---------- MIN_NR_GENS ---------->| | + * |<---------------------------- MAX_NR_GENS ---------------------------->| + */ + if (folio_test_active(folio)) + gen = MIN_NR_GENS - folio_test_workingset(folio); + else if (reclaiming) + gen = MAX_NR_GENS; + else if ((!folio_is_file_lru(folio) && !folio_test_swapcache(folio)) || + (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio)))) + gen = MIN_NR_GENS; + else + gen = MAX_NR_GENS - folio_test_workingset(folio); + + return max(READ_ONCE(lrugen->max_seq) - gen + 1, READ_ONCE(lrugen->min_seq[type])); +} + static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { unsigned long seq; unsigned long flags; - unsigned long mask; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); @@ -237,40 +263,12 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; - /* - * There are four common cases for this page: - * 1. If it's hot, i.e., freshly faulted in, add it to the youngest - * generation, and it's protected over the rest below. - * 2. If it can't be evicted immediately, i.e., a dirty page pending - * writeback, add it to the second youngest generation. - * 3. If it should be evicted first, e.g., cold and clean from - * folio_rotate_reclaimable(), add it to the oldest generation. - * 4. Everything else falls between 2 & 3 above and is added to the - * second oldest generation if it's considered inactive, or the - * oldest generation otherwise. See lru_gen_is_active(). - */ - if (folio_test_active(folio)) - seq = lrugen->max_seq; - else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || - (folio_test_reclaim(folio) && - (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->max_seq - 1; - else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) - seq = lrugen->min_seq[type]; - else - seq = lrugen->min_seq[type] + 1; + seq = lru_gen_folio_seq(lruvec, folio, reclaiming); gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - mask = LRU_GEN_MASK; - /* - * Don't clear PG_workingset here because it can affect PSI accounting - * if the activation is due to workingset refault. - */ - if (folio_test_active(folio)) - mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active); - set_mask_bits(&folio->flags, mask, flags); + set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ @@ -564,9 +562,9 @@ static inline pte_marker copy_pte_marker( * Must be called with pgtable lock held so that no thread will see the none * pte, and if they see it, they'll fault and serialize at the pgtable lock. * - * This function is a no-op if PTE_MARKER_UFFD_WP is not enabled. + * Returns true if an uffd-wp pte was installed, false otherwise. */ -static inline void +static inline bool pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t pteval) { @@ -583,7 +581,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, * with a swap pte. There's no way of leaking the bit. */ if (vma_is_anonymous(vma) || !userfaultfd_wp(vma)) - return; + return false; /* A uffd-wp wr-protected normal pte */ if (unlikely(pte_present(pteval) && pte_uffd_wp(pteval))) @@ -596,10 +594,13 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, if (unlikely(pte_swp_uffd_wp_any(pteval))) arm_uffd_pte = true; - if (unlikely(arm_uffd_pte)) + if (unlikely(arm_uffd_pte)) { set_pte_at(vma->vm_mm, addr, pte, make_pte_marker(PTE_MARKER_UFFD_WP)); + return true; + } #endif + return false; } static inline bool vma_has_recency(struct vm_area_struct *vma) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 14fc1b39c0cf..56d07edd01f9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include <linux/seqlock.h> #include <linux/percpu_counter.h> +#include <linux/types.h> #include <asm/mmu.h> @@ -99,6 +100,10 @@ struct page { /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; + struct { + struct llist_node pcp_llist; + unsigned int order; + }; }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; @@ -129,8 +134,11 @@ struct page { unsigned long compound_head; /* Bit zero is set */ }; struct { /* ZONE_DEVICE pages */ - /** @pgmap: Points to the hosting device page map. */ - struct dev_pagemap *pgmap; + /* + * The first word is used for compound_head or folio + * pgmap + */ + void *_unused_pgmap_compound_head; void *zone_device_data; /* * ZONE_DEVICE private pages are counted as being @@ -283,6 +291,49 @@ typedef struct { unsigned long val; } swp_entry_t; +#if defined(CONFIG_MEMCG) || defined(CONFIG_SLAB_OBJ_EXT) +/* We have some extra room after the refcount in tail pages. */ +#define NR_PAGES_IN_LARGE_FOLIO +#endif + +/* + * On 32bit, we can cut the required metadata in half, because: + * (a) PID_MAX_LIMIT implicitly limits the number of MMs we could ever have, + * so we can limit MM IDs to 15 bit (32767). + * (b) We don't expect folios where even a single complete PTE mapping by + * one MM would exceed 15 bits (order-15). + */ +#ifdef CONFIG_64BIT +typedef int mm_id_mapcount_t; +#define MM_ID_MAPCOUNT_MAX INT_MAX +typedef unsigned int mm_id_t; +#else /* !CONFIG_64BIT */ +typedef short mm_id_mapcount_t; +#define MM_ID_MAPCOUNT_MAX SHRT_MAX +typedef unsigned short mm_id_t; +#endif /* CONFIG_64BIT */ + +/* We implicitly use the dummy ID for init-mm etc. where we never rmap pages. */ +#define MM_ID_DUMMY 0 +#define MM_ID_MIN (MM_ID_DUMMY + 1) + +/* + * We leave the highest bit of each MM id unused, so we can store a flag + * in the highest bit of each folio->_mm_id[]. + */ +#define MM_ID_BITS ((sizeof(mm_id_t) * BITS_PER_BYTE) - 1) +#define MM_ID_MASK ((1U << MM_ID_BITS) - 1) +#define MM_ID_MAX MM_ID_MASK + +/* + * In order to use bit_spin_lock(), which requires an unsigned long, we + * operate on folio->_mm_ids when working on flags. + */ +#define FOLIO_MM_IDS_LOCK_BITNUM MM_ID_BITS +#define FOLIO_MM_IDS_LOCK_BIT BIT(FOLIO_MM_IDS_LOCK_BITNUM) +#define FOLIO_MM_IDS_SHARED_BITNUM (2 * MM_ID_BITS + 1) +#define FOLIO_MM_IDS_SHARED_BIT BIT(FOLIO_MM_IDS_SHARED_BITNUM) + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. @@ -292,6 +343,8 @@ typedef struct { * anonymous memory. * @index: Offset within the file, in units of pages. For anonymous memory, * this is the index from the beginning of the mmap. + * @share: number of DAX mappings that reference this folio. See + * dax_associate_entry. * @private: Filesystem per-folio data (see folio_attach_private()). * @swap: Used for swp_entry_t if folio_test_swapcache(). * @_mapcount: Do not access this member directly. Use folio_mapcount() to @@ -299,13 +352,17 @@ typedef struct { * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. + * @pgmap: Metadata for ZONE_DEVICE mappings * @virtual: Virtual address in the kernel direct map. * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_large_mapcount: Do not use directly, call folio_mapcount(). * @_nr_pages_mapped: Do not use outside of rmap and debug code. * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). - * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). + * @_nr_pages: Do not use directly, call folio_nr_pages(). + * @_mm_id: Do not use outside of rmap code. + * @_mm_ids: Do not use outside of rmap code. + * @_mm_id_mapcount: Do not use outside of rmap code. * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. @@ -337,9 +394,13 @@ struct folio { /* private: */ }; /* public: */ + struct dev_pagemap *pgmap; }; struct address_space *mapping; - pgoff_t index; + union { + pgoff_t index; + unsigned long share; + }; union { void *private; swp_entry_t swap; @@ -365,14 +426,30 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + union { + struct { /* public: */ - atomic_t _large_mapcount; - atomic_t _entire_mapcount; - atomic_t _nr_pages_mapped; - atomic_t _pincount; + atomic_t _large_mapcount; + atomic_t _nr_pages_mapped; #ifdef CONFIG_64BIT - unsigned int _folio_nr_pages; -#endif + atomic_t _entire_mapcount; + atomic_t _pincount; +#endif /* CONFIG_64BIT */ + mm_id_mapcount_t _mm_id_mapcount[2]; + union { + mm_id_t _mm_id[2]; + unsigned long _mm_ids; + }; + /* private: the union with struct page is transitional */ + }; + unsigned long _usable_1[4]; + }; + atomic_t _mapcount_1; + atomic_t _refcount_1; + /* public: */ +#ifdef NR_PAGES_IN_LARGE_FOLIO + unsigned int _nr_pages; +#endif /* NR_PAGES_IN_LARGE_FOLIO */ /* private: the union with struct page is transitional */ }; struct page __page_1; @@ -382,20 +459,27 @@ struct folio { unsigned long _flags_2; unsigned long _head_2; /* public: */ - void *_hugetlb_subpool; - void *_hugetlb_cgroup; - void *_hugetlb_cgroup_rsvd; - void *_hugetlb_hwpoison; + struct list_head _deferred_list; +#ifndef CONFIG_64BIT + atomic_t _entire_mapcount; + atomic_t _pincount; +#endif /* !CONFIG_64BIT */ /* private: the union with struct page is transitional */ }; + struct page __page_2; + }; + union { struct { - unsigned long _flags_2a; - unsigned long _head_2a; + unsigned long _flags_3; + unsigned long _head_3; /* public: */ - struct list_head _deferred_list; + void *_hugetlb_subpool; + void *_hugetlb_cgroup; + void *_hugetlb_cgroup_rsvd; + void *_hugetlb_hwpoison; /* private: the union with struct page is transitional */ }; - struct page __page_2; + struct page __page_3; }; }; @@ -424,21 +508,29 @@ FOLIO_MATCH(_last_cpupid, _last_cpupid); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); +FOLIO_MATCH(_mapcount, _mapcount_1); +FOLIO_MATCH(_refcount, _refcount_1); #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); -FOLIO_MATCH(flags, _flags_2a); -FOLIO_MATCH(compound_head, _head_2a); +#undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + 3 * sizeof(struct page)) +FOLIO_MATCH(flags, _flags_3); +FOLIO_MATCH(compound_head, _head_3); #undef FOLIO_MATCH /** * struct ptdesc - Memory descriptor for page tables. * @__page_flags: Same as page flags. Powerpc only. * @pt_rcu_head: For freeing page table pages. - * @pt_list: List of used page tables. Used for s390 and x86. + * @pt_list: List of used page tables. Used for s390 gmap shadow pages + * (which are not linked into the user page tables) and x86 + * pgds. * @_pt_pad_1: Padding that aliases with page's compound head. * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. * @__page_mapping: Aliases with page->mapping. Unused for page tables. @@ -572,6 +664,12 @@ static inline void *folio_get_private(struct folio *folio) typedef unsigned long vm_flags_t; /* + * freeptr_t represents a SLUB freelist pointer, which might be encoded + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. + */ +typedef struct { unsigned long v; } freeptr_t; + +/* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that * map parts of them. @@ -627,9 +725,8 @@ static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) } #endif -struct vma_lock { - struct rw_semaphore lock; -}; +#define VMA_LOCK_OFFSET 0x40000000 +#define VMA_REF_LIMIT (VMA_LOCK_OFFSET - 1) struct vma_numab_state { /* @@ -675,6 +772,9 @@ struct vma_numab_state { * * Only explicitly marked struct members may be accessed by RCU readers before * getting a stable reference. + * + * WARNING: when adding new members, please update vm_area_init_from() to copy + * them during vm_area_struct content duplication. */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -685,9 +785,7 @@ struct vm_area_struct { unsigned long vm_start; unsigned long vm_end; }; -#ifdef CONFIG_PER_VMA_LOCK - struct rcu_head vm_rcu; /* Used for deferred freeing. */ -#endif + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ }; /* @@ -708,18 +806,12 @@ struct vm_area_struct { #ifdef CONFIG_PER_VMA_LOCK /* - * Flag to indicate areas detached from the mm->mm_mt tree. - * Unstable RCU readers are allowed to read this. - */ - bool detached; - - /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) - * - vm_lock->lock (in write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) - * - vm_lock->lock (in read or write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * @@ -727,21 +819,8 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; - /* Unstable RCU readers are allowed to read this. */ - struct vma_lock *vm_lock; + unsigned int vm_lock_seq; #endif - - /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. - * - */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma @@ -761,14 +840,6 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ -#ifdef CONFIG_ANON_VMA_NAME - /* - * For private and shared anonymous mappings, a pointer to a null - * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. - */ - struct anon_vma_name *anon_name; -#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif @@ -781,6 +852,30 @@ struct vm_area_struct { #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif +#ifdef CONFIG_PER_VMA_LOCK + /* Unstable RCU readers are allowed to read this. */ + refcount_t vm_refcnt ____cacheline_aligned_in_smp; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map vmlock_dep_map; +#endif +#endif + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; } __randomize_layout; @@ -916,12 +1011,16 @@ struct mm_struct { * by mmlist_lock */ #ifdef CONFIG_PER_VMA_LOCK + struct rcuwait vma_writer_wait; /* * This field has lock-like semantics, meaning it is sometimes * accessed with ACQUIRE/RELEASE semantics. * Roughly speaking, incrementing the sequence number is * equivalent to releasing locks on VMAs; reading the sequence * number can be part of taking a read lock on a VMA. + * Incremented every time mmap_lock is write-locked/unlocked. + * Initialized to 0, therefore odd values indicate mmap_lock + * is write-locked and even values that it's released. * * Can be modified under write mmap_lock using RELEASE * semantics. @@ -930,7 +1029,7 @@ struct mm_struct { * Can be read with ACQUIRE semantics if not holding write * mmap_lock. */ - int mm_lock_seq; + seqcount_t mm_lock_seq; #endif @@ -1065,6 +1164,9 @@ struct mm_struct { #endif } lru_gen; #endif /* CONFIG_LRU_GEN_WALKS_MMU */ +#ifdef CONFIG_MM_ID + mm_id_t mm_id; +#endif /* CONFIG_MM_ID */ } __randomize_layout; /* @@ -1402,6 +1504,7 @@ enum tlb_flush_reason { TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, + TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, }; diff --git a/include/linux/mman.h b/include/linux/mman.h index a842783ffa62..f4c6346a8fcd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -59,8 +59,6 @@ | MAP_HUGE_1GB) extern int sysctl_overcommit_memory; -extern int sysctl_overcommit_ratio; -extern unsigned long sysctl_overcommit_kbytes; extern struct percpu_counter vm_committed_as; #ifdef CONFIG_SMP @@ -157,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +#endif arch_calc_vm_flag_bits(file, flags); } diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index de9dc20b01ba..4706c6769902 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -71,39 +71,62 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) } #ifdef CONFIG_PER_VMA_LOCK -/* - * Drop all currently-held per-VMA locks. - * This is called from the mmap_lock implementation directly before releasing - * a write-locked mmap_lock (or downgrading it to read-locked). - * This should normally NOT be called manually from other places. - * If you want to call this manually anyway, keep in mind that this will release - * *all* VMA write locks, including ones from further up the stack. - */ -static inline void vma_end_write_all(struct mm_struct *mm) + +static inline void mm_lock_seqcount_init(struct mm_struct *mm) +{ + seqcount_init(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) +{ + do_raw_write_seqcount_begin(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_end(struct mm_struct *mm) +{ + ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); + do_raw_write_seqcount_end(&mm->mm_lock_seq); +} + +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) { - mmap_assert_write_locked(mm); /* - * Nobody can concurrently modify mm->mm_lock_seq due to exclusive - * mmap_lock being held. - * We need RELEASE semantics here to ensure that preceding stores into - * the VMA take effect before we unlock it with this store. - * Pairs with ACQUIRE semantics in vma_start_read(). + * Since mmap_lock is a sleeping lock, and waiting for it to become + * unlocked is more or less equivalent with taking it ourselves, don't + * bother with the speculative path if mmap_lock is already write-locked + * and take the slow path, which takes the lock. */ - smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1); + return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); +} + +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return read_seqcount_retry(&mm->mm_lock_seq, seq); } -#else -static inline void vma_end_write_all(struct mm_struct *mm) {} -#endif -static inline void mmap_init_lock(struct mm_struct *mm) +#else /* CONFIG_PER_VMA_LOCK */ + +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} + +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) { - init_rwsem(&mm->mmap_lock); + return false; } +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return true; +} + +#endif /* CONFIG_PER_VMA_LOCK */ + static inline void mmap_write_lock(struct mm_struct *mm) { __mmap_lock_trace_start_locking(mm, true); down_write(&mm->mmap_lock); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -111,6 +134,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) { __mmap_lock_trace_start_locking(mm, true); down_write_nested(&mm->mmap_lock, subclass); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -120,10 +144,26 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm) __mmap_lock_trace_start_locking(mm, true); ret = down_write_killable(&mm->mmap_lock); + if (!ret) + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, ret == 0); return ret; } +/* + * Drop all currently-held per-VMA locks. + * This is called from the mmap_lock implementation directly before releasing + * a write-locked mmap_lock (or downgrading it to read-locked). + * This should normally NOT be called manually from other places. + * If you want to call this manually anyway, keep in mind that this will release + * *all* VMA write locks, including ones from further up the stack. + */ +static inline void vma_end_write_all(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + mm_lock_seqcount_end(mm); +} + static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 526fce581657..ddcdf23d731c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -329,6 +329,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ +#define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 56972bd78462..01e0f591a20b 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -57,6 +57,7 @@ struct mmc_command { #define MMC_RSP_NONE (0) #define MMC_RSP_R1 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) #define MMC_RSP_R1B (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE|MMC_RSP_BUSY) +#define MMC_RSP_R1B_NO_CRC (MMC_RSP_PRESENT|MMC_RSP_OPCODE|MMC_RSP_BUSY) #define MMC_RSP_R2 (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC) #define MMC_RSP_R3 (MMC_RSP_PRESENT) #define MMC_RSP_R4 (MMC_RSP_PRESENT) @@ -64,9 +65,6 @@ struct mmc_command { #define MMC_RSP_R6 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) #define MMC_RSP_R7 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) -/* Can be used by core to poll after switch to MMC HS mode */ -#define MMC_RSP_R1_NO_CRC (MMC_RSP_PRESENT|MMC_RSP_OPCODE) - #define mmc_resp_type(cmd) ((cmd)->flags & (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC|MMC_RSP_BUSY|MMC_RSP_OPCODE)) /* diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f166d6611ddb..68f09a955a90 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -590,6 +590,14 @@ static inline struct mmc_host *mmc_from_priv(void *priv) return container_of(priv, struct mmc_host, private); } +#ifdef CONFIG_MMC_CRYPTO +static inline struct mmc_host * +mmc_from_crypto_profile(struct blk_crypto_profile *profile) +{ + return container_of(profile, struct mmc_host, crypto_profile); +} +#endif + #define mmc_host_is_spi(host) ((host)->caps & MMC_CAP_SPI) #define mmc_dev(x) ((x)->parent) diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 274a2767ea49..1ed7b0d1e4f9 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -22,7 +22,6 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, unsigned int idx, unsigned int debounce); int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config); -void mmc_gpio_set_cd_isr(struct mmc_host *host, irq_handler_t isr); int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); void mmc_gpiod_request_cd_irq(struct mmc_host *host); bool mmc_can_gpio_cd(struct mmc_host *host); diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index d7cb1e5ecbda..a0a3894900ed 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -9,10 +9,12 @@ struct page; struct vm_area_struct; struct mm_struct; struct vma_iterator; +struct vma_merge_struct; void dump_page(const struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); +void dump_vmg(const struct vma_merge_struct *vmg, const char *reason); void vma_iter_dump_tree(const struct vma_iterator *vmi); #ifdef CONFIG_DEBUG_VM @@ -87,6 +89,15 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_VMG(cond, vmg) ({ \ + int __ret_warn = !!(cond); \ + \ + if (unlikely(__ret_warn)) { \ + dump_vmg(vmg, "VM_WARN_ON_VMG(" __stringify(cond)")"); \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn); \ +}) #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) @@ -104,9 +115,10 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); #define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_MM(cond, mm) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_VMG(cond, vmg) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) -#endif +#endif /* CONFIG_DEBUG_VM */ #ifdef CONFIG_DEBUG_VM_IRQSOFF #define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled()) diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index bbaec80c78c5..ac01dc4eb2ce 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -24,6 +24,7 @@ static inline void leave_mm(void) { } #ifndef task_cpu_possible_mask # define task_cpu_possible_mask(p) cpu_possible_mask # define task_cpu_possible(cpu, p) true +# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK) #else # define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p)) #endif diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index e2dd57ca368b..bc2402a45741 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -43,10 +43,10 @@ struct mmu_interval_notifier; * a device driver to possibly ignore the invalidation if the * owner field matches the driver's device private pgmap owner. * - * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no - * longer have exclusive access to the page. When sent during creation of an - * exclusive range the owner will be initialised to the value provided by the - * caller of make_device_exclusive_range(), otherwise the owner will be NULL. + * @MMU_NOTIFY_EXCLUSIVE: conversion of a page table entry to device-exclusive. + * The owner is initialized to the value provided by the caller of + * make_device_exclusive(), such that this caller can filter out these + * events. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b36124145a16..6ccec1bf2896 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -138,6 +138,7 @@ enum numa_stat_item { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, + NR_FREE_PAGES_BLOCKS, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, NR_ZONE_ACTIVE_ANON, @@ -220,9 +221,11 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, + PGDEMOTE_PROACTIVE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif + NR_BALLOON_PAGES, NR_VM_NODE_STAT_ITEMS }; @@ -332,66 +335,88 @@ enum lruvec_flags { #endif /* !__GENERATING_BOUNDS_H */ /* - * Evictable pages are divided into multiple generations. The youngest and the + * Evictable folios are divided into multiple generations. The youngest and the * oldest generation numbers, max_seq and min_seq, are monotonically increasing. * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the * corresponding generation. The gen counter in folio->flags stores gen+1 while - * a page is on one of lrugen->folios[]. Otherwise it stores 0. + * a folio is on one of lrugen->folios[]. Otherwise it stores 0. * - * A page is added to the youngest generation on faulting. The aging needs to - * check the accessed bit at least twice before handing this page over to the - * eviction. The first check takes care of the accessed bit set on the initial - * fault; the second check makes sure this page hasn't been used since then. - * This process, AKA second chance, requires a minimum of two generations, - * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive - * LRU, e.g., /proc/vmstat, these two generations are considered active; the - * rest of generations, if they exist, are considered inactive. See - * lru_gen_is_active(). + * After a folio is faulted in, the aging needs to check the accessed bit at + * least twice before handing this folio over to the eviction. The first check + * clears the accessed bit from the initial fault; the second check makes sure + * this folio hasn't been used since then. This process, AKA second chance, + * requires a minimum of two generations, hence MIN_NR_GENS. And to maintain ABI + * compatibility with the active/inactive LRU, e.g., /proc/vmstat, these two + * generations are considered active; the rest of generations, if they exist, + * are considered inactive. See lru_gen_is_active(). * - * PG_active is always cleared while a page is on one of lrugen->folios[] so - * that the aging needs not to worry about it. And it's set again when a page - * considered active is isolated for non-reclaiming purposes, e.g., migration. - * See lru_gen_add_folio() and lru_gen_del_folio(). + * PG_active is always cleared while a folio is on one of lrugen->folios[] so + * that the sliding window needs not to worry about it. And it's set again when + * a folio considered active is isolated for non-reclaiming purposes, e.g., + * migration. See lru_gen_add_folio() and lru_gen_del_folio(). * * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits - * in folio->flags. + * in folio->flags, masked by LRU_GEN_MASK. */ #define MIN_NR_GENS 2U #define MAX_NR_GENS 4U /* - * Each generation is divided into multiple tiers. A page accessed N times - * through file descriptors is in tier order_base_2(N). A page in the first tier - * (N=0,1) is marked by PG_referenced unless it was faulted in through page - * tables or read ahead. A page in any other tier (N>1) is marked by - * PG_referenced and PG_workingset. This implies a minimum of two tiers is - * supported without using additional bits in folio->flags. + * Each generation is divided into multiple tiers. A folio accessed N times + * through file descriptors is in tier order_base_2(N). A folio in the first + * tier (N=0,1) is marked by PG_referenced unless it was faulted in through page + * tables or read ahead. A folio in the last tier (MAX_NR_TIERS-1) is marked by + * PG_workingset. A folio in any other tier (1<N<5) between the first and last + * is marked by additional bits of LRU_REFS_WIDTH in folio->flags. * * In contrast to moving across generations which requires the LRU lock, moving * across tiers only involves atomic operations on folio->flags and therefore * has a negligible cost in the buffered access path. In the eviction path, - * comparisons of refaulted/(evicted+protected) from the first tier and the - * rest infer whether pages accessed multiple times through file descriptors - * are statistically hot and thus worth protecting. + * comparisons of refaulted/(evicted+protected) from the first tier and the rest + * infer whether folios accessed multiple times through file descriptors are + * statistically hot and thus worth protecting. * * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in - * folio->flags. + * folio->flags, masked by LRU_REFS_MASK. */ #define MAX_NR_TIERS 4U #ifndef __GENERATING_BOUNDS_H -struct lruvec; -struct page_vma_mapped_walk; - #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) +/* + * For folios accessed multiple times through file descriptors, + * lru_gen_inc_refs() sets additional bits of LRU_REFS_WIDTH in folio->flags + * after PG_referenced, then PG_workingset after LRU_REFS_WIDTH. After all its + * bits are set, i.e., LRU_REFS_FLAGS|BIT(PG_workingset), a folio is lazily + * promoted into the second oldest generation in the eviction path. And when + * folio_inc_gen() does that, it clears LRU_REFS_FLAGS so that + * lru_gen_inc_refs() can start over. Note that for this case, LRU_REFS_MASK is + * only valid when PG_referenced is set. + * + * For folios accessed multiple times through page tables, folio_update_gen() + * from a page table walk or lru_gen_set_refs() from a rmap walk sets + * PG_referenced after the accessed bit is cleared for the first time. + * Thereafter, those two paths set PG_workingset and promote folios to the + * youngest generation. Like folio_inc_gen(), folio_update_gen() also clears + * PG_referenced. Note that for this case, LRU_REFS_MASK is not used. + * + * For both cases above, after PG_workingset is set on a folio, it remains until + * this folio is either reclaimed, or "deactivated" by lru_gen_clear_refs(). It + * can be set again if lru_gen_test_recent() returns true upon a refault. + */ +#define LRU_REFS_FLAGS (LRU_REFS_MASK | BIT(PG_referenced)) + +struct lruvec; +struct page_vma_mapped_walk; + #ifdef CONFIG_LRU_GEN enum { @@ -406,8 +431,6 @@ enum { NR_LRU_GEN_CAPS }; -#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) - #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) @@ -421,12 +444,11 @@ enum { /* * The youngest generation number is stored in max_seq for both anon and file * types as they are aged on an equal footing. The oldest generation numbers are - * stored in min_seq[] separately for anon and file types as clean file pages - * can be evicted regardless of swap constraints. - * - * Normally anon and file min_seq are in sync. But if swapping is constrained, - * e.g., out of swap space, file min_seq is allowed to advance and leave anon - * min_seq behind. + * stored in min_seq[] separately for anon and file types so that they can be + * incremented independently. Ideally min_seq[] are kept in sync when both anon + * and file types are evictable. However, to adapt to situations like extreme + * swappiness, they are allowed to be out of sync by at most + * MAX_NR_GENS-MIN_NR_GENS-1. * * The number of pages in each generation is eventually consistent and therefore * can be transiently negative when reset_batch_size() is pending. @@ -446,8 +468,8 @@ struct lru_gen_folio { unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; /* the exponential moving average of evicted+protected */ unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; - /* the first tier doesn't need protection, hence the minus one */ - unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; + /* can only be modified under the LRU lock */ + unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* can be modified without holding the LRU lock */ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; @@ -498,7 +520,7 @@ struct lru_gen_mm_walk { int mm_stats[NR_MM_STATS]; /* total batched items */ int batched; - bool can_swap; + int swappiness; bool force_scan; }; @@ -945,6 +967,9 @@ struct zone { #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; + + /* To be called once the last page in the zone is accepted */ + struct work_struct unaccepted_cleanup; #endif /* zone flags, see below */ @@ -953,6 +978,9 @@ struct zone { /* Primarily protects free_area */ spinlock_t lock; + /* Pages to be freed when next trylock succeeds */ + struct llist_head trylock_free_pages; + /* Write-intensive fields used by compaction and vmstats. */ CACHELINE_PADDING(_pad2_); @@ -1139,6 +1167,12 @@ static inline bool is_zone_device_page(const struct page *page) return page_zonenum(page) == ZONE_DEVICE; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page); + return page_folio(page)->pgmap; +} + /* * Consecutive zone device pages should not be merged into the same sgl * or bvec segment with other types of pages or if they belong to different @@ -1154,7 +1188,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, return false; if (!is_zone_device_page(a)) return true; - return a->pgmap == b->pgmap; + return page_pgmap(a) == page_pgmap(b); } extern void memmap_init_zone_device(struct zone *, unsigned long, @@ -1169,6 +1203,10 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, { return true; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + return NULL; +} #endif static inline bool folio_is_zone_device(const struct folio *folio) @@ -1464,8 +1502,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. @@ -1915,6 +1951,9 @@ enum { #ifdef CONFIG_ZONE_DEVICE SECTION_TAINT_ZONE_DEVICE_BIT, #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT + SECTION_IS_VMEMMAP_PREINIT_BIT, +#endif SECTION_MAP_LAST_BIT, }; @@ -1925,6 +1964,9 @@ enum { #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +#define SECTION_IS_VMEMMAP_PREINIT BIT(SECTION_IS_VMEMMAP_PREINIT_BIT) +#endif #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT @@ -1979,6 +2021,30 @@ static inline int online_device_section(struct mem_section *section) } #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return (section && + (section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT)); +} + +void sparse_vmemmap_init_nid_early(int nid); +void sparse_vmemmap_init_nid_late(int nid); + +#else +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return 0; +} +static inline void sparse_vmemmap_init_nid_early(int nid) +{ +} + +static inline void sparse_vmemmap_init_nid_late(int nid) +{ +} +#endif + static inline int online_section_nr(unsigned long nr) { return online_section(__nr_to_section(nr)); @@ -2016,6 +2082,9 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) } #endif +void sparse_init_early_section(int nid, struct page *map, unsigned long pnum, + unsigned long flags); + #ifndef CONFIG_HAVE_ARCH_PFN_VALID /** * pfn_valid - check if there is a valid memory map entry for a PFN @@ -2078,6 +2147,11 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr) return -1; } +#define for_each_present_section_nr(start, section_nr) \ + for (section_nr = next_present_section_nr(start - 1); \ + section_nr != -1; \ + section_nr = next_present_section_nr(section_nr)) + /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate @@ -2097,6 +2171,8 @@ void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) +#define sparse_vmemmap_init_nid_early(_nid, _use) do {} while (0) +#define sparse_vmemmap_init_nid_late(_nid) do {} while (0) #define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index b1b219bc3422..e71a6070a8f8 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); +static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) +{ + return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; +} + #ifdef CONFIG_MULTIUSER static inline uid_t __vfsuid_val(vfsuid_t uid) { diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4338b1b4ac44..bd7e60c0b72f 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -692,6 +692,7 @@ struct x86_cpu_id { __u16 feature; /* bit index */ /* Solely for kernel-internal use: DO NOT EXPORT to userspace! */ __u16 flags; + __u8 type; kernel_ulong_t driver_data; }; @@ -700,7 +701,10 @@ struct x86_cpu_id { #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 #define X86_STEPPING_ANY 0 +#define X86_STEP_MIN 0 +#define X86_STEP_MAX 0xf #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +#define X86_CPU_TYPE_ANY 0 /* * Generic table type for matching CPU features. diff --git a/include/linux/module.h b/include/linux/module.h index b3a643435357..b3329110d668 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -52,9 +52,9 @@ struct module_kobject { struct module_attribute { struct attribute attr; - ssize_t (*show)(struct module_attribute *, struct module_kobject *, + ssize_t (*show)(const struct module_attribute *, struct module_kobject *, char *); - ssize_t (*store)(struct module_attribute *, struct module_kobject *, + ssize_t (*store)(const struct module_attribute *, struct module_kobject *, const char *, size_t count); void (*setup)(struct module *, const char *); int (*test)(struct module *); @@ -67,10 +67,10 @@ struct module_version_attribute { const char *version; }; -extern ssize_t __modver_version_show(struct module_attribute *, +extern ssize_t __modver_version_show(const struct module_attribute *, struct module_kobject *, char *); -extern struct module_attribute module_uevent; +extern const struct module_attribute module_uevent; /* These are either module local, or the kernel's dummy ones. */ extern int init_module(void); @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) @@ -275,7 +277,7 @@ extern typeof(name) __mod_device_table__##type##__##name \ #else #define MODULE_VERSION(_version) \ MODULE_INFO(version, _version); \ - static struct module_version_attribute __modver_attr \ + static const struct module_version_attribute __modver_attr \ __used __section("__modver") \ __aligned(__alignof__(struct module_version_attribute)) \ = { \ @@ -306,7 +308,10 @@ extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x)))) +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) /* modules using other modules: kdb wants to see this. */ struct module_use { @@ -367,7 +372,6 @@ enum mod_mem_type { struct module_memory { void *base; - void *rw_copy; bool is_rox; unsigned int size; @@ -430,7 +434,7 @@ struct module { /* Exported symbols */ const struct kernel_symbol *syms; - const s32 *crcs; + const u32 *crcs; unsigned int num_syms; #ifdef CONFIG_ARCH_USES_CFI_TRAPS @@ -448,7 +452,7 @@ struct module { /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; - const s32 *gpl_crcs; + const u32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_MODULE_SIG @@ -663,7 +667,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod) return within_module_init(addr, mod) || within_module_core(addr, mod); } -/* Search for module by name: must be in a RCU-sched critical section. */ +/* Search for module by name: must be in a RCU critical section. */ struct module *find_module(const char *name); extern void __noreturn __module_put_and_kthread_exit(struct module *mod, @@ -769,15 +773,7 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); -void *__module_writable_address(struct module *mod, void *loc); - -static inline void *module_writable_address(struct module *mod, void *loc) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod || - mod->state != MODULE_STATE_UNFORMED) - return loc; - return __module_writable_address(mod, loc); -} +void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data); #else /* !CONFIG_MODULES... */ @@ -887,9 +883,8 @@ static inline bool module_is_coming(struct module *mod) return false; } -static inline void *module_writable_address(struct module *mod, void *loc) +static inline void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data) { - return loc; } #endif /* CONFIG_MODULES */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1f5507ba5a12..e395461d59e5 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *mod); - #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/include/linux/mount.h b/include/linux/mount.h index 04213d8ef837..1a3136e53eaa 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,48 +22,52 @@ struct fs_context; struct file; struct path; -#define MNT_NOSUID 0x01 -#define MNT_NODEV 0x02 -#define MNT_NOEXEC 0x04 -#define MNT_NOATIME 0x08 -#define MNT_NODIRATIME 0x10 -#define MNT_RELATIME 0x20 -#define MNT_READONLY 0x40 /* does the user want this to be r/o? */ -#define MNT_NOSYMFOLLOW 0x80 - -#define MNT_SHRINKABLE 0x100 -#define MNT_WRITE_HOLD 0x200 - -#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -/* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ -#define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ - | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY | MNT_NOSYMFOLLOW) -#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) - -#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) - -#define MNT_INTERNAL 0x4000 - -#define MNT_LOCK_ATIME 0x040000 -#define MNT_LOCK_NOEXEC 0x080000 -#define MNT_LOCK_NOSUID 0x100000 -#define MNT_LOCK_NODEV 0x200000 -#define MNT_LOCK_READONLY 0x400000 -#define MNT_LOCKED 0x800000 -#define MNT_DOOMED 0x1000000 -#define MNT_SYNC_UMOUNT 0x2000000 -#define MNT_MARKED 0x4000000 -#define MNT_UMOUNT 0x8000000 +enum mount_flags { + MNT_NOSUID = 0x01, + MNT_NODEV = 0x02, + MNT_NOEXEC = 0x04, + MNT_NOATIME = 0x08, + MNT_NODIRATIME = 0x10, + MNT_RELATIME = 0x20, + MNT_READONLY = 0x40, /* does the user want this to be r/o? */ + MNT_NOSYMFOLLOW = 0x80, + + MNT_SHRINKABLE = 0x100, + MNT_WRITE_HOLD = 0x200, + + MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ + MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ + + MNT_INTERNAL = 0x4000, + + MNT_LOCK_ATIME = 0x040000, + MNT_LOCK_NOEXEC = 0x080000, + MNT_LOCK_NOSUID = 0x100000, + MNT_LOCK_NODEV = 0x200000, + MNT_LOCK_READONLY = 0x400000, + MNT_LOCKED = 0x800000, + MNT_DOOMED = 0x1000000, + MNT_SYNC_UMOUNT = 0x2000000, + MNT_MARKED = 0x4000000, + MNT_UMOUNT = 0x8000000, + + /* + * MNT_SHARED_MASK is the set of flags that should be cleared when a + * mount becomes shared. Currently, this is only the flag that says a + * mount cannot be bind mounted, since this is how we create a mount + * that shares events with another mount. If you add a new MNT_* + * flag, consider how it interacts with shared mounts. + */ + MNT_SHARED_MASK = MNT_UNBINDABLE, + MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME + | MNT_READONLY | MNT_NOSYMFOLLOW, + MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, + + MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | + MNT_LOCKED, +}; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ @@ -75,7 +79,7 @@ struct vfsmount { static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { /* Pairs with smp_store_release() in do_idmap_mount(). */ - return smp_load_acquire(&mnt->mnt_idmap); + return READ_ONCE(mnt->mnt_idmap); } extern int mnt_want_write(struct vfsmount *mnt); @@ -113,7 +117,7 @@ extern struct vfsmount *kern_mount(struct file_system_type *); extern void kern_unmount(struct vfsmount *mnt); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); -extern long do_mount(const char *, const char __user *, +int do_mount(const char *, const char __user *, const char *, unsigned long, void *); extern struct vfsmount *collect_mounts(const struct path *); extern void drop_collected_mounts(struct vfsmount *); diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 58a2401e4b55..0075f6e5c3da 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -262,6 +262,11 @@ struct mr_table { int mroute_reg_vif_num; }; +static inline bool mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net_initialized(net); +} + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, diff --git a/include/linux/msi.h b/include/linux/msi.h index b10093c4d00e..86e42742fd0f 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -73,7 +73,6 @@ struct msi_msg { }; }; -extern int pci_msi_ignore_mask; /* Helper functions */ struct msi_desc; struct pci_dev; @@ -166,6 +165,10 @@ struct msi_desc_data { * @dev: Pointer to the device which uses this descriptor * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor + * @iommu_msi_iova: Optional shifted IOVA from the IOMMU to override the msi_addr. + * Only used if iommu_msi_shift != 0 + * @iommu_msi_shift: Indicates how many bits of the original address should be + * preserved when using iommu_msi_iova. * @sysfs_attr: Pointer to sysfs device attribute * * @write_msi_msg: Callback that may be called when the MSI message @@ -184,7 +187,8 @@ struct msi_desc { struct msi_msg msg; struct irq_affinity_desc *affinity; #ifdef CONFIG_IRQ_MSI_IOMMU - const void *iommu_cookie; + u64 iommu_msi_iova : 58; + u64 iommu_msi_shift : 6; #endif #ifdef CONFIG_SYSFS struct device_attribute *sysfs_attrs; @@ -285,28 +289,42 @@ struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, #define msi_desc_to_dev(desc) ((desc)->dev) -#ifdef CONFIG_IRQ_MSI_IOMMU -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) -{ - return desc->iommu_cookie; -} - -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, - const void *iommu_cookie) -{ - desc->iommu_cookie = iommu_cookie; -} -#else -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) +static inline void msi_desc_set_iommu_msi_iova(struct msi_desc *desc, u64 msi_iova, + unsigned int msi_shift) { - return NULL; +#ifdef CONFIG_IRQ_MSI_IOMMU + desc->iommu_msi_iova = msi_iova >> msi_shift; + desc->iommu_msi_shift = msi_shift; +#endif } -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, - const void *iommu_cookie) +/** + * msi_msg_set_addr() - Set MSI address in an MSI message + * + * @desc: MSI descriptor that may carry an IOVA base address for MSI via @iommu_msi_iova/shift + * @msg: Target MSI message to set its address_hi and address_lo + * @msi_addr: Physical address to set the MSI message + * + * Notes: + * - Override @msi_addr using the IOVA base address in the @desc if @iommu_msi_shift is set + * - Otherwise, simply set @msi_addr to @msg + */ +static inline void msi_msg_set_addr(struct msi_desc *desc, struct msi_msg *msg, + phys_addr_t msi_addr) { -} +#ifdef CONFIG_IRQ_MSI_IOMMU + if (desc->iommu_msi_shift) { + u64 msi_iova = desc->iommu_msi_iova << desc->iommu_msi_shift; + + msg->address_hi = upper_32_bits(msi_iova); + msg->address_lo = lower_32_bits(msi_iova) | + (msi_addr & ((1 << desc->iommu_msi_shift) - 1)); + return; + } #endif + msg->address_hi = upper_32_bits(msi_addr); + msg->address_lo = lower_32_bits(msi_addr); +} int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, struct msi_desc *init_desc); @@ -556,6 +574,16 @@ enum { MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), /* PCI MSIs cannot be steered separately to CPU cores */ MSI_FLAG_NO_AFFINITY = (1 << 21), + /* Inhibit usage of entry masking */ + MSI_FLAG_NO_MASK = (1 << 22), +}; + +/* + * Flags for msi_parent_ops::chip_flags + */ +enum { + MSI_CHIP_FLAG_SET_EOI = (1 << 0), + MSI_CHIP_FLAG_SET_ACK = (1 << 1), }; /** @@ -563,6 +591,8 @@ enum { * * @supported_flags: Required: The supported MSI flags of the parent domain * @required_flags: Optional: The required MSI flags of the parent MSI domain + * @chip_flags: Optional: Select MSI chip callbacks to update with defaults + * in msi_lib_init_dev_msi_info(). * @bus_select_token: Optional: The bus token of the real parent domain for * irq_domain::select() * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for @@ -575,6 +605,7 @@ enum { struct msi_parent_ops { u32 supported_flags; u32 required_flags; + u32 chip_flags; u32 bus_select_token; u32 bus_select_mask; const char *prefix; diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h new file mode 100644 index 000000000000..e8462deda6db --- /dev/null +++ b/include/linux/mtd/nand-qpic-common.h @@ -0,0 +1,487 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * QCOM QPIC common APIs header file + * + * Copyright (c) 2023 Qualcomm Inc. + * Authors: Md sadre Alam <quic_mdalam@quicinc.com> + * + */ +#ifndef __MTD_NAND_QPIC_COMMON_H__ +#define __MTD_NAND_QPIC_COMMON_H__ + +/* NANDc reg offsets */ +#define NAND_FLASH_CMD 0x00 +#define NAND_ADDR0 0x04 +#define NAND_ADDR1 0x08 +#define NAND_FLASH_CHIP_SELECT 0x0c +#define NAND_EXEC_CMD 0x10 +#define NAND_FLASH_STATUS 0x14 +#define NAND_BUFFER_STATUS 0x18 +#define NAND_DEV0_CFG0 0x20 +#define NAND_DEV0_CFG1 0x24 +#define NAND_DEV0_ECC_CFG 0x28 +#define NAND_AUTO_STATUS_EN 0x2c +#define NAND_DEV1_CFG0 0x30 +#define NAND_DEV1_CFG1 0x34 +#define NAND_READ_ID 0x40 +#define NAND_READ_STATUS 0x44 +#define NAND_DEV_CMD0 0xa0 +#define NAND_DEV_CMD1 0xa4 +#define NAND_DEV_CMD2 0xa8 +#define NAND_DEV_CMD_VLD 0xac +#define SFLASHC_BURST_CFG 0xe0 +#define NAND_ERASED_CW_DETECT_CFG 0xe8 +#define NAND_ERASED_CW_DETECT_STATUS 0xec +#define NAND_EBI2_ECC_BUF_CFG 0xf0 +#define FLASH_BUF_ACC 0x100 + +#define NAND_CTRL 0xf00 +#define NAND_VERSION 0xf08 +#define NAND_READ_LOCATION_0 0xf20 +#define NAND_READ_LOCATION_1 0xf24 +#define NAND_READ_LOCATION_2 0xf28 +#define NAND_READ_LOCATION_3 0xf2c +#define NAND_READ_LOCATION_LAST_CW_0 0xf40 +#define NAND_READ_LOCATION_LAST_CW_1 0xf44 +#define NAND_READ_LOCATION_LAST_CW_2 0xf48 +#define NAND_READ_LOCATION_LAST_CW_3 0xf4c + +/* dummy register offsets, used by qcom_write_reg_dma */ +#define NAND_DEV_CMD1_RESTORE 0xdead +#define NAND_DEV_CMD_VLD_RESTORE 0xbeef + +/* NAND_FLASH_CMD bits */ +#define PAGE_ACC BIT(4) +#define LAST_PAGE BIT(5) + +/* NAND_FLASH_CHIP_SELECT bits */ +#define NAND_DEV_SEL 0 +#define DM_EN BIT(2) + +/* NAND_FLASH_STATUS bits */ +#define FS_OP_ERR BIT(4) +#define FS_READY_BSY_N BIT(5) +#define FS_MPU_ERR BIT(8) +#define FS_DEVICE_STS_ERR BIT(16) +#define FS_DEVICE_WP BIT(23) + +/* NAND_BUFFER_STATUS bits */ +#define BS_UNCORRECTABLE_BIT BIT(8) +#define BS_CORRECTABLE_ERR_MSK 0x1f + +/* NAND_DEVn_CFG0 bits */ +#define DISABLE_STATUS_AFTER_WRITE BIT(4) +#define CW_PER_PAGE 6 +#define CW_PER_PAGE_MASK GENMASK(8, 6) +#define UD_SIZE_BYTES 9 +#define UD_SIZE_BYTES_MASK GENMASK(18, 9) +#define ECC_PARITY_SIZE_BYTES_RS GENMASK(22, 19) +#define SPARE_SIZE_BYTES 23 +#define SPARE_SIZE_BYTES_MASK GENMASK(26, 23) +#define NUM_ADDR_CYCLES 27 +#define NUM_ADDR_CYCLES_MASK GENMASK(29, 27) +#define STATUS_BFR_READ BIT(30) +#define SET_RD_MODE_AFTER_STATUS BIT(31) + +/* NAND_DEVn_CFG0 bits */ +#define DEV0_CFG1_ECC_DISABLE BIT(0) +#define WIDE_FLASH BIT(1) +#define NAND_RECOVERY_CYCLES 2 +#define NAND_RECOVERY_CYCLES_MASK GENMASK(4, 2) +#define CS_ACTIVE_BSY BIT(5) +#define BAD_BLOCK_BYTE_NUM 6 +#define BAD_BLOCK_BYTE_NUM_MASK GENMASK(15, 6) +#define BAD_BLOCK_IN_SPARE_AREA BIT(16) +#define WR_RD_BSY_GAP 17 +#define WR_RD_BSY_GAP_MASK GENMASK(22, 17) +#define ENABLE_BCH_ECC BIT(27) + +/* NAND_DEV0_ECC_CFG bits */ +#define ECC_CFG_ECC_DISABLE BIT(0) +#define ECC_SW_RESET BIT(1) +#define ECC_MODE 4 +#define ECC_MODE_MASK GENMASK(5, 4) +#define ECC_PARITY_SIZE_BYTES_BCH 8 +#define ECC_PARITY_SIZE_BYTES_BCH_MASK GENMASK(12, 8) +#define ECC_NUM_DATA_BYTES 16 +#define ECC_NUM_DATA_BYTES_MASK GENMASK(25, 16) +#define ECC_FORCE_CLK_OPEN BIT(30) + +/* NAND_DEV_CMD1 bits */ +#define READ_ADDR_MASK GENMASK(7, 0) + +/* NAND_DEV_CMD_VLD bits */ +#define READ_START_VLD BIT(0) +#define READ_STOP_VLD BIT(1) +#define WRITE_START_VLD BIT(2) +#define ERASE_START_VLD BIT(3) +#define SEQ_READ_START_VLD BIT(4) + +/* NAND_EBI2_ECC_BUF_CFG bits */ +#define NUM_STEPS 0 +#define NUM_STEPS_MASK GENMASK(9, 0) + +/* NAND_ERASED_CW_DETECT_CFG bits */ +#define ERASED_CW_ECC_MASK 1 +#define AUTO_DETECT_RES 0 +#define MASK_ECC BIT(ERASED_CW_ECC_MASK) +#define RESET_ERASED_DET BIT(AUTO_DETECT_RES) +#define ACTIVE_ERASED_DET (0 << AUTO_DETECT_RES) +#define CLR_ERASED_PAGE_DET (RESET_ERASED_DET | MASK_ECC) +#define SET_ERASED_PAGE_DET (ACTIVE_ERASED_DET | MASK_ECC) + +/* NAND_ERASED_CW_DETECT_STATUS bits */ +#define PAGE_ALL_ERASED BIT(7) +#define CODEWORD_ALL_ERASED BIT(6) +#define PAGE_ERASED BIT(5) +#define CODEWORD_ERASED BIT(4) +#define ERASED_PAGE (PAGE_ALL_ERASED | PAGE_ERASED) +#define ERASED_CW (CODEWORD_ALL_ERASED | CODEWORD_ERASED) + +/* NAND_READ_LOCATION_n bits */ +#define READ_LOCATION_OFFSET 0 +#define READ_LOCATION_OFFSET_MASK GENMASK(9, 0) +#define READ_LOCATION_SIZE 16 +#define READ_LOCATION_SIZE_MASK GENMASK(25, 16) +#define READ_LOCATION_LAST 31 +#define READ_LOCATION_LAST_MASK BIT(31) + +/* Version Mask */ +#define NAND_VERSION_MAJOR_MASK 0xf0000000 +#define NAND_VERSION_MAJOR_SHIFT 28 +#define NAND_VERSION_MINOR_MASK 0x0fff0000 +#define NAND_VERSION_MINOR_SHIFT 16 + +/* NAND OP_CMDs */ +#define OP_PAGE_READ 0x2 +#define OP_PAGE_READ_WITH_ECC 0x3 +#define OP_PAGE_READ_WITH_ECC_SPARE 0x4 +#define OP_PAGE_READ_ONFI_READ 0x5 +#define OP_PROGRAM_PAGE 0x6 +#define OP_PAGE_PROGRAM_WITH_ECC 0x7 +#define OP_PROGRAM_PAGE_SPARE 0x9 +#define OP_BLOCK_ERASE 0xa +#define OP_CHECK_STATUS 0xc +#define OP_FETCH_ID 0xb +#define OP_RESET_DEVICE 0xd + +/* Default Value for NAND_DEV_CMD_VLD */ +#define NAND_DEV_CMD_VLD_VAL (READ_START_VLD | WRITE_START_VLD | \ + ERASE_START_VLD | SEQ_READ_START_VLD) + +/* NAND_CTRL bits */ +#define BAM_MODE_EN BIT(0) + +/* + * the NAND controller performs reads/writes with ECC in 516 byte chunks. + * the driver calls the chunks 'step' or 'codeword' interchangeably + */ +#define NANDC_STEP_SIZE 512 + +/* + * the largest page size we support is 8K, this will have 16 steps/codewords + * of 512 bytes each + */ +#define MAX_NUM_STEPS (SZ_8K / NANDC_STEP_SIZE) + +/* we read at most 3 registers per codeword scan */ +#define MAX_REG_RD (3 * MAX_NUM_STEPS) + +/* ECC modes supported by the controller */ +#define ECC_NONE BIT(0) +#define ECC_RS_4BIT BIT(1) +#define ECC_BCH_4BIT BIT(2) +#define ECC_BCH_8BIT BIT(3) + +/* + * Returns the actual register address for all NAND_DEV_ registers + * (i.e. NAND_DEV_CMD0, NAND_DEV_CMD1, NAND_DEV_CMD2 and NAND_DEV_CMD_VLD) + */ +#define dev_cmd_reg_addr(nandc, reg) ((nandc)->props->dev_cmd_reg_start + (reg)) + +/* Returns the dma address for reg read buffer */ +#define reg_buf_dma_addr(chip, vaddr) \ + ((chip)->reg_read_dma + \ + ((u8 *)(vaddr) - (u8 *)(chip)->reg_read_buf)) + +#define QPIC_PER_CW_CMD_ELEMENTS 32 +#define QPIC_PER_CW_CMD_SGL 32 +#define QPIC_PER_CW_DATA_SGL 8 + +#define QPIC_NAND_COMPLETION_TIMEOUT msecs_to_jiffies(2000) + +/* + * Flags used in DMA descriptor preparation helper functions + * (i.e. qcom_read_reg_dma/qcom_write_reg_dma/qcom_read_data_dma/qcom_write_data_dma) + */ +/* Don't set the EOT in current tx BAM sgl */ +#define NAND_BAM_NO_EOT BIT(0) +/* Set the NWD flag in current BAM sgl */ +#define NAND_BAM_NWD BIT(1) +/* Finish writing in the current BAM sgl and start writing in another BAM sgl */ +#define NAND_BAM_NEXT_SGL BIT(2) +/* + * Erased codeword status is being used two times in single transfer so this + * flag will determine the current value of erased codeword status register + */ +#define NAND_ERASED_CW_SET BIT(4) + +#define MAX_ADDRESS_CYCLE 5 + +/* + * This data type corresponds to the BAM transaction which will be used for all + * NAND transfers. + * @bam_ce - the array of BAM command elements + * @cmd_sgl - sgl for NAND BAM command pipe + * @data_sgl - sgl for NAND BAM consumer/producer pipe + * @last_data_desc - last DMA desc in data channel (tx/rx). + * @last_cmd_desc - last DMA desc in command channel. + * @txn_done - completion for NAND transfer. + * @bam_ce_pos - the index in bam_ce which is available for next sgl + * @bam_ce_start - the index in bam_ce which marks the start position ce + * for current sgl. It will be used for size calculation + * for current sgl + * @cmd_sgl_pos - current index in command sgl. + * @cmd_sgl_start - start index in command sgl. + * @tx_sgl_pos - current index in data sgl for tx. + * @tx_sgl_start - start index in data sgl for tx. + * @rx_sgl_pos - current index in data sgl for rx. + * @rx_sgl_start - start index in data sgl for rx. + */ +struct bam_transaction { + struct bam_cmd_element *bam_ce; + struct scatterlist *cmd_sgl; + struct scatterlist *data_sgl; + struct dma_async_tx_descriptor *last_data_desc; + struct dma_async_tx_descriptor *last_cmd_desc; + struct completion txn_done; + struct_group(bam_positions, + u32 bam_ce_pos; + u32 bam_ce_start; + u32 cmd_sgl_pos; + u32 cmd_sgl_start; + u32 tx_sgl_pos; + u32 tx_sgl_start; + u32 rx_sgl_pos; + u32 rx_sgl_start; + + ); +}; + +/* + * This data type corresponds to the nand dma descriptor + * @dma_desc - low level DMA engine descriptor + * @list - list for desc_info + * + * @adm_sgl - sgl which will be used for single sgl dma descriptor. Only used by + * ADM + * @bam_sgl - sgl which will be used for dma descriptor. Only used by BAM + * @sgl_cnt - number of SGL in bam_sgl. Only used by BAM + * @dir - DMA transfer direction + */ +struct desc_info { + struct dma_async_tx_descriptor *dma_desc; + struct list_head node; + + union { + struct scatterlist adm_sgl; + struct { + struct scatterlist *bam_sgl; + int sgl_cnt; + }; + }; + enum dma_data_direction dir; +}; + +/* + * holds the current register values that we want to write. acts as a contiguous + * chunk of memory which we use to write the controller registers through DMA. + */ +struct nandc_regs { + __le32 cmd; + __le32 addr0; + __le32 addr1; + __le32 chip_sel; + __le32 exec; + + __le32 cfg0; + __le32 cfg1; + __le32 ecc_bch_cfg; + + __le32 clrflashstatus; + __le32 clrreadstatus; + + __le32 cmd1; + __le32 vld; + + __le32 orig_cmd1; + __le32 orig_vld; + + __le32 ecc_buf_cfg; + __le32 read_location0; + __le32 read_location1; + __le32 read_location2; + __le32 read_location3; + __le32 read_location_last0; + __le32 read_location_last1; + __le32 read_location_last2; + __le32 read_location_last3; + __le32 spi_cfg; + __le32 num_addr_cycle; + __le32 busy_wait_cnt; + __le32 flash_feature; + + __le32 erased_cw_detect_cfg_clr; + __le32 erased_cw_detect_cfg_set; +}; + +/* + * NAND controller data struct + * + * @dev: parent device + * + * @base: MMIO base + * + * @core_clk: controller clock + * @aon_clk: another controller clock + * @iomacro_clk: io macro clock + * + * @regs: a contiguous chunk of memory for DMA register + * writes. contains the register values to be + * written to controller + * + * @props: properties of current NAND controller, + * initialized via DT match data + * + * @controller: base controller structure + * @qspi: qpic spi structure + * @host_list: list containing all the chips attached to the + * controller + * + * @chan: dma channel + * @cmd_crci: ADM DMA CRCI for command flow control + * @data_crci: ADM DMA CRCI for data flow control + * + * @desc_list: DMA descriptor list (list of desc_infos) + * + * @data_buffer: our local DMA buffer for page read/writes, + * used when we can't use the buffer provided + * by upper layers directly + * @reg_read_buf: local buffer for reading back registers via DMA + * + * @base_phys: physical base address of controller registers + * @base_dma: dma base address of controller registers + * @reg_read_dma: contains dma address for register read buffer + * + * @buf_size/count/start: markers for chip->legacy.read_buf/write_buf + * functions + * @max_cwperpage: maximum QPIC codewords required. calculated + * from all connected NAND devices pagesize + * + * @reg_read_pos: marker for data read in reg_read_buf + * + * @cmd1/vld: some fixed controller register values + * + * @exec_opwrite: flag to select correct number of code word + * while reading status + */ +struct qcom_nand_controller { + struct device *dev; + + void __iomem *base; + + struct clk *core_clk; + struct clk *aon_clk; + + struct nandc_regs *regs; + struct bam_transaction *bam_txn; + + const struct qcom_nandc_props *props; + + struct nand_controller *controller; + struct qpic_spi_nand *qspi; + struct list_head host_list; + + union { + /* will be used only by QPIC for BAM DMA */ + struct { + struct dma_chan *tx_chan; + struct dma_chan *rx_chan; + struct dma_chan *cmd_chan; + }; + + /* will be used only by EBI2 for ADM DMA */ + struct { + struct dma_chan *chan; + unsigned int cmd_crci; + unsigned int data_crci; + }; + }; + + struct list_head desc_list; + + u8 *data_buffer; + __le32 *reg_read_buf; + + phys_addr_t base_phys; + dma_addr_t base_dma; + dma_addr_t reg_read_dma; + + int buf_size; + int buf_count; + int buf_start; + unsigned int max_cwperpage; + + int reg_read_pos; + + u32 cmd1, vld; + bool exec_opwrite; +}; + +/* + * This data type corresponds to the NAND controller properties which varies + * among different NAND controllers. + * @ecc_modes - ecc mode for NAND + * @dev_cmd_reg_start - NAND_DEV_CMD_* registers starting offset + * @supports_bam - whether NAND controller is using BAM + * @nandc_part_of_qpic - whether NAND controller is part of qpic IP + * @qpic_version2 - flag to indicate QPIC IP version 2 + * @use_codeword_fixup - whether NAND has different layout for boot partitions + */ +struct qcom_nandc_props { + u32 ecc_modes; + u32 dev_cmd_reg_start; + u32 bam_offset; + bool supports_bam; + bool nandc_part_of_qpic; + bool qpic_version2; + bool use_codeword_fixup; +}; + +void qcom_free_bam_transaction(struct qcom_nand_controller *nandc); +struct bam_transaction *qcom_alloc_bam_transaction(struct qcom_nand_controller *nandc); +void qcom_clear_bam_transaction(struct qcom_nand_controller *nandc); +void qcom_qpic_bam_dma_done(void *data); +void qcom_nandc_dev_to_mem(struct qcom_nand_controller *nandc, bool is_cpu); +int qcom_prepare_bam_async_desc(struct qcom_nand_controller *nandc, + struct dma_chan *chan, unsigned long flags); +int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, + int reg_off, const void *vaddr, int size, unsigned int flags); +int qcom_prep_bam_dma_desc_data(struct qcom_nand_controller *nandc, bool read, + const void *vaddr, int size, unsigned int flags); +int qcom_prep_adm_dma_desc(struct qcom_nand_controller *nandc, bool read, int reg_off, + const void *vaddr, int size, bool flow_control); +int qcom_read_reg_dma(struct qcom_nand_controller *nandc, int first, int num_regs, + unsigned int flags); +int qcom_write_reg_dma(struct qcom_nand_controller *nandc, __le32 *vaddr, int first, + int num_regs, unsigned int flags); +int qcom_read_data_dma(struct qcom_nand_controller *nandc, int reg_off, const u8 *vaddr, + int size, unsigned int flags); +int qcom_write_data_dma(struct qcom_nand_controller *nandc, int reg_off, const u8 *vaddr, + int size, unsigned int flags); +int qcom_submit_descs(struct qcom_nand_controller *nandc); +void qcom_clear_read_regs(struct qcom_nand_controller *nandc); +void qcom_nandc_unalloc(struct qcom_nand_controller *nandc); +int qcom_nandc_alloc(struct qcom_nand_controller *nandc); +#endif + diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 0e2f228e8b4a..07486168d104 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -21,7 +21,7 @@ struct nand_device; * @oobsize: OOB area size * @pages_per_eraseblock: number of pages per eraseblock * @eraseblocks_per_lun: number of eraseblocks per LUN (Logical Unit Number) - * @max_bad_eraseblocks_per_lun: maximum number of eraseblocks per LUN + * @max_bad_eraseblocks_per_lun: maximum number of bad eraseblocks per LUN * @planes_per_lun: number of planes per LUN * @luns_per_target: number of LUN per target (target is a synonym for die) * @ntargets: total number of targets exposed by the NAND device diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 702e5fb13dae..aba653207c0f 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -62,66 +62,116 @@ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PAGE_READ_FROM_CACHE_OP(fast, addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1), \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, ...) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1)) + +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_PAGE_READ_FROM_CACHE_OP_3A(fast, addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1), \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 2)) -#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 2)) -#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x3d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(3, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 2), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 2), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ - SPI_MEM_OP_ADDR(2, addr, 2), \ - SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x6d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP_3A(addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ - SPI_MEM_OP_ADDR(3, addr, 2), \ - SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) - -#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) -#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(3, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xed, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 4), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 4), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) + #define SPINAND_PROG_EXEC_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ @@ -268,6 +318,7 @@ extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; extern const struct spinand_manufacturer paragon_spinand_manufacturer; +extern const struct spinand_manufacturer skyhigh_spinand_manufacturer; extern const struct spinand_manufacturer toshiba_spinand_manufacturer; extern const struct spinand_manufacturer winbond_spinand_manufacturer; extern const struct spinand_manufacturer xtx_spinand_manufacturer; @@ -314,6 +365,7 @@ struct spinand_ecc_info { #define SPINAND_HAS_CR_FEAT_BIT BIT(1) #define SPINAND_HAS_PROG_PLANE_SELECT_BIT BIT(2) #define SPINAND_HAS_READ_PLANE_SELECT_BIT BIT(3) +#define SPINAND_NO_RAW_ACCESS BIT(4) /** * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure @@ -325,6 +377,67 @@ struct spinand_ondie_ecc_conf { }; /** + * struct spinand_otp_layout - structure to describe the SPI NAND OTP area + * @npages: number of pages in the OTP + * @start_page: start page of the user/factory OTP area. + */ +struct spinand_otp_layout { + unsigned int npages; + unsigned int start_page; +}; + +/** + * struct spinand_fact_otp_ops - SPI NAND OTP methods for factory area + * @info: get the OTP area information + * @read: read from the SPI NAND OTP area + */ +struct spinand_fact_otp_ops { + int (*info)(struct spinand_device *spinand, size_t len, + struct otp_info *buf, size_t *retlen); + int (*read)(struct spinand_device *spinand, loff_t from, size_t len, + size_t *retlen, u8 *buf); +}; + +/** + * struct spinand_user_otp_ops - SPI NAND OTP methods for user area + * @info: get the OTP area information + * @lock: lock an OTP region + * @erase: erase an OTP region + * @read: read from the SPI NAND OTP area + * @write: write to the SPI NAND OTP area + */ +struct spinand_user_otp_ops { + int (*info)(struct spinand_device *spinand, size_t len, + struct otp_info *buf, size_t *retlen); + int (*lock)(struct spinand_device *spinand, loff_t from, size_t len); + int (*erase)(struct spinand_device *spinand, loff_t from, size_t len); + int (*read)(struct spinand_device *spinand, loff_t from, size_t len, + size_t *retlen, u8 *buf); + int (*write)(struct spinand_device *spinand, loff_t from, size_t len, + size_t *retlen, const u8 *buf); +}; + +/** + * struct spinand_fact_otp - SPI NAND OTP grouping structure for factory area + * @layout: OTP region layout + * @ops: OTP access ops + */ +struct spinand_fact_otp { + const struct spinand_otp_layout layout; + const struct spinand_fact_otp_ops *ops; +}; + +/** + * struct spinand_user_otp - SPI NAND OTP grouping structure for user area + * @layout: OTP region layout + * @ops: OTP access ops + */ +struct spinand_user_otp { + const struct spinand_otp_layout layout; + const struct spinand_user_otp_ops *ops; +}; + +/** * struct spinand_info - Structure used to describe SPI NAND chips * @model: model name * @devid: device ID @@ -339,6 +452,10 @@ struct spinand_ondie_ecc_conf { * @select_target: function used to select a target/die. Required only for * multi-die chips * @set_cont_read: enable/disable continuous cached reads + * @fact_otp: SPI NAND factory OTP info. + * @user_otp: SPI NAND user OTP info. + * @read_retries: the number of read retry modes supported + * @set_read_retry: enable/disable read retry for data recovery * * Each SPI NAND manufacturer driver should have a spinand_info table * describing all the chips supported by the driver. @@ -359,6 +476,11 @@ struct spinand_info { unsigned int target); int (*set_cont_read)(struct spinand_device *spinand, bool enable); + struct spinand_fact_otp fact_otp; + struct spinand_user_otp user_otp; + unsigned int read_retries; + int (*set_read_retry)(struct spinand_device *spinand, + unsigned int read_retry); }; #define SPINAND_ID(__method, ...) \ @@ -382,10 +504,32 @@ struct spinand_info { } #define SPINAND_SELECT_TARGET(__func) \ - .select_target = __func, + .select_target = __func #define SPINAND_CONT_READ(__set_cont_read) \ - .set_cont_read = __set_cont_read, + .set_cont_read = __set_cont_read + +#define SPINAND_FACT_OTP_INFO(__npages, __start_page, __ops) \ + .fact_otp = { \ + .layout = { \ + .npages = __npages, \ + .start_page = __start_page, \ + }, \ + .ops = __ops, \ + } + +#define SPINAND_USER_OTP_INFO(__npages, __start_page, __ops) \ + .user_otp = { \ + .layout = { \ + .npages = __npages, \ + .start_page = __start_page, \ + }, \ + .ops = __ops, \ + } + +#define SPINAND_READ_RETRY(__read_retries, __set_read_retry) \ + .read_retries = __read_retries, \ + .set_read_retry = __set_read_retry #define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants, \ __flags, ...) \ @@ -437,6 +581,10 @@ struct spinand_dirmap { * actually relevant to enable this feature. * @set_cont_read: Enable/disable the continuous read feature * @priv: manufacturer private data + * @fact_otp: SPI NAND factory OTP info. + * @user_otp: SPI NAND user OTP info. + * @read_retries: the number of read retry modes supported + * @set_read_retry: Enable/disable the read retry feature */ struct spinand_device { struct nand_device base; @@ -469,6 +617,13 @@ struct spinand_device { bool cont_read_possible; int (*set_cont_read)(struct spinand_device *spinand, bool enable); + + const struct spinand_fact_otp *fact_otp; + const struct spinand_user_otp *user_otp; + + unsigned int read_retries; + int (*set_read_retry)(struct spinand_device *spinand, + unsigned int retry_mode); }; /** @@ -538,4 +693,26 @@ int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val); int spinand_select_target(struct spinand_device *spinand, unsigned int target); +int spinand_wait(struct spinand_device *spinand, unsigned long initial_delay_us, + unsigned long poll_delay_us, u8 *s); + +int spinand_read_page(struct spinand_device *spinand, + const struct nand_page_io_req *req); + +int spinand_write_page(struct spinand_device *spinand, + const struct nand_page_io_req *req); + +size_t spinand_otp_page_size(struct spinand_device *spinand); +size_t spinand_fact_otp_size(struct spinand_device *spinand); +size_t spinand_user_otp_size(struct spinand_device *spinand); + +int spinand_fact_otp_read(struct spinand_device *spinand, loff_t ofs, + size_t len, size_t *retlen, u8 *buf); +int spinand_user_otp_read(struct spinand_device *spinand, loff_t ofs, + size_t len, size_t *retlen, u8 *buf); +int spinand_user_otp_write(struct spinand_device *spinand, loff_t ofs, + size_t len, size_t *retlen, const u8 *buf); + +int spinand_set_mtd_otp_ops(struct spinand_device *spinand); + #endif /* __LINUX_MTD_SPINAND_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2bf91b57591b..2143d05116be 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -202,4 +202,6 @@ DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +extern unsigned long mutex_get_owner(struct mutex *lock); + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/namei.h b/include/linux/namei.h index 8ec8fed3bce8..bbaf55fb3101 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -18,35 +18,36 @@ enum { MAX_NESTED_LINKS = 8 }; enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; /* pathwalk mode */ -#define LOOKUP_FOLLOW 0x0001 /* follow links at the end */ -#define LOOKUP_DIRECTORY 0x0002 /* require a directory */ -#define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */ -#define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */ -#define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */ -#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */ - -#define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */ -#define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */ +#define LOOKUP_FOLLOW BIT(0) /* follow links at the end */ +#define LOOKUP_DIRECTORY BIT(1) /* require a directory */ +#define LOOKUP_AUTOMOUNT BIT(2) /* force terminal automount */ +#define LOOKUP_EMPTY BIT(3) /* accept empty path [user_... only] */ +#define LOOKUP_LINKAT_EMPTY BIT(4) /* Linkat request with empty path. */ +#define LOOKUP_DOWN BIT(5) /* follow mounts in the starting point */ +#define LOOKUP_MOUNTPOINT BIT(6) /* follow mounts in the end */ +#define LOOKUP_REVAL BIT(7) /* tell ->d_revalidate() to trust no cache */ +#define LOOKUP_RCU BIT(8) /* RCU pathwalk mode; semi-internal */ +#define LOOKUP_CACHED BIT(9) /* Only do cached lookup */ +#define LOOKUP_PARENT BIT(10) /* Looking up final parent in path */ +/* 5 spare bits for pathwalk */ /* These tell filesystem methods that we are dealing with the final component... */ -#define LOOKUP_OPEN 0x0100 /* ... in open */ -#define LOOKUP_CREATE 0x0200 /* ... in object creation */ -#define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */ -#define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */ +#define LOOKUP_OPEN BIT(16) /* ... in open */ +#define LOOKUP_CREATE BIT(17) /* ... in object creation */ +#define LOOKUP_EXCL BIT(18) /* ... in target must not exist */ +#define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */ -/* internal use only */ -#define LOOKUP_PARENT 0x0010 +/* 4 spare bits for intent */ /* Scoping flags for lookup. */ -#define LOOKUP_NO_SYMLINKS 0x010000 /* No symlink crossing. */ -#define LOOKUP_NO_MAGICLINKS 0x020000 /* No nd_jump_link() crossing. */ -#define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ -#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ -#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ -#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ -#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */ +#define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */ +#define LOOKUP_NO_MAGICLINKS BIT(25) /* No nd_jump_link() crossing. */ +#define LOOKUP_NO_XDEV BIT(26) /* No mountpoint crossing. */ +#define LOOKUP_BENEATH BIT(27) /* No escaping from starting point. */ +#define LOOKUP_IN_ROOT BIT(28) /* Treat dirfd as fs root. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) +/* 3 spare bits for scoping */ extern int path_pts(struct path *path); @@ -61,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, diff --git a/include/linux/net.h b/include/linux/net.h index b75bc534c1b3..0ff950eecc6b 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -343,8 +343,6 @@ static inline bool sendpages_ok(struct page *page, size_t len, size_t offset) int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); -int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, - struct kvec *vec, size_t num, size_t len); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h index 1c1332e4df26..13274c3def66 100644 --- a/include/linux/net/intel/iidc.h +++ b/include/linux/net/intel/iidc.h @@ -78,6 +78,8 @@ int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); +int ice_alloc_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); +void ice_free_rdma_qvector(struct ice_pf *pf, struct msix_entry *entry); /* Structure representing auxiliary driver tailored information about the core * PCI dev, each auxiliary driver using the IIDC interface will have an diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 662074b08c94..ff0758e88ea1 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -20,6 +20,33 @@ enum hwtstamp_source { }; /** + * struct hwtstamp_provider_desc - hwtstamp provider description + * + * @index: index of the hwtstamp provider. + * @qualifier: hwtstamp provider qualifier. + */ +struct hwtstamp_provider_desc { + int index; + enum hwtstamp_provider_qualifier qualifier; +}; + +/** + * struct hwtstamp_provider - hwtstamp provider object + * + * @rcu_head: RCU callback used to free the struct. + * @source: source of the hwtstamp provider. + * @phydev: pointer of the phydev source in case a PTP coming from phylib + * @desc: hwtstamp provider description. + */ + +struct hwtstamp_provider { + struct rcu_head rcu_head; + enum hwtstamp_source source; + struct phy_device *phydev; + struct hwtstamp_provider_desc desc; +}; + +/** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * * @flags: see struct hwtstamp_config @@ -31,6 +58,7 @@ enum hwtstamp_source { * copied the ioctl request back to user space * @source: indication whether timestamps should come from the netdev or from * an attached phylib PHY + * @qualifier: qualifier of the hwtstamp provider * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -43,6 +71,7 @@ struct kernel_hwtstamp_config { struct ifreq *ifr; bool copied_to_user; enum hwtstamp_source source; + enum hwtstamp_provider_qualifier qualifier; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 11be70a7929f..7a01c518e573 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -53,12 +53,12 @@ enum { NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */ NETIF_F_GSO_FRAGLIST_BIT, /* ... Fraglist GSO */ + NETIF_F_GSO_ACCECN_BIT, /* TCP AccECN w/ TSO (no clear CWR) */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_FRAGLIST_BIT, + NETIF_F_GSO_ACCECN_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ - __UNUSED_NETIF_F_37, NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ @@ -128,6 +128,7 @@ enum { #define NETIF_F_SG __NETIF_F(SG) #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) +#define NETIF_F_GSO_ACCECN __NETIF_F(GSO_ACCECN) #define NETIF_F_TSO __NETIF_F(TSO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) @@ -210,7 +211,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ + NETIF_F_GSO_ACCECN | NETIF_F_GSO_SCTP | \ NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bb71ad82b42b..33338a233cc7 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct dsa_port; struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; +struct netdev_config; struct netdev_name_node; struct sd_flow_limit; struct sfp_bus; @@ -82,6 +83,7 @@ struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; struct phy_link_topology; +struct hwtstamp_provider; typedef u32 xdp_features_t; @@ -338,11 +340,27 @@ struct gro_list { }; /* - * size of gro hash buckets, must less than bit number of - * napi_struct::gro_bitmask + * size of gro hash buckets, must be <= the number of bits in + * gro_node::bitmask */ #define GRO_HASH_BUCKETS 8 +/** + * struct gro_node - structure to support Generic Receive Offload + * @bitmask: bitmask to indicate used buckets in @hash + * @hash: hashtable of pending aggregated skbs, separated by flows + * @rx_list: list of pending ``GRO_NORMAL`` skbs + * @rx_count: cached current length of @rx_list + * @cached_napi_id: napi_struct::napi_id cached for hotpath, 0 for standalone + */ +struct gro_node { + unsigned long bitmask; + struct gro_list hash[GRO_HASH_BUCKETS]; + struct list_head rx_list; + u32 rx_count; + u32 cached_napi_id; +}; + /* * Structure for per-NAPI config */ @@ -350,6 +368,7 @@ struct napi_config { u64 gro_flush_timeout; u64 irq_suspend_timeout; u32 defer_hard_irqs; + cpumask_t affinity_mask; unsigned int napi_id; }; @@ -368,7 +387,6 @@ struct napi_struct { unsigned long state; int weight; u32 defer_hard_irqs_count; - unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL /* CPU actively polling if netpoll is configured */ @@ -377,20 +395,21 @@ struct napi_struct { /* CPU on which NAPI has been scheduled for processing */ int list_owner; struct net_device *dev; - struct gro_list gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; - struct list_head rx_list; /* Pending GRO_NORMAL skbs */ - int rx_count; /* length of rx_list */ - unsigned int napi_id; + struct gro_node gro; struct hrtimer timer; + /* all fields past this point are write-protected by netdev_lock */ struct task_struct *thread; unsigned long gro_flush_timeout; unsigned long irq_suspend_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ + u32 napi_id; struct list_head dev_list; struct hlist_node napi_hash_node; int irq; + struct irq_affinity_notify notify; + int napi_rmap_idx; int index; struct napi_config *config; }; @@ -406,6 +425,7 @@ enum { NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ + NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */ }; enum { @@ -419,6 +439,7 @@ enum { NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), + NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER), }; enum gro_result { @@ -509,7 +530,7 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) * is scheduled for example in the context of delayed timer * that can be skipped if a NAPI is already scheduled. * - * Return True if NAPI is scheduled, False otherwise. + * Return: True if NAPI is scheduled, False otherwise. */ static inline bool napi_is_scheduled(struct napi_struct *n) { @@ -524,7 +545,7 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. - * Return true if we schedule a NAPI or false if not. + * Return: true if we schedule a NAPI or false if not. * Refer to napi_schedule_prep() for additional reason on why * a NAPI might not be scheduled. */ @@ -558,7 +579,7 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) * Mark NAPI processing as complete. Should only be called if poll budget * has not been completely consumed. * Prefer over napi_complete(). - * Return false if device should avoid rearming interrupts. + * Return: false if device should avoid rearming interrupts. */ bool napi_complete_done(struct napi_struct *n, int work_done); @@ -569,16 +590,11 @@ static inline bool napi_complete(struct napi_struct *n) int dev_set_threaded(struct net_device *dev, bool threaded); -/** - * napi_disable - prevent NAPI from scheduling - * @n: NAPI context - * - * Stop NAPI from being scheduled on this context. - * Waits till any outstanding processing completes. - */ void napi_disable(struct napi_struct *n); +void napi_disable_locked(struct napi_struct *n); void napi_enable(struct napi_struct *n); +void napi_enable_locked(struct napi_struct *n); /** * napi_synchronize - wait until NAPI is not running @@ -660,6 +676,7 @@ struct netdev_queue { struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; + const struct attribute_group **groups; #endif unsigned long tx_maxrate; /* @@ -693,7 +710,7 @@ struct netdev_queue { * slow- / control-path part */ /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; @@ -995,9 +1012,13 @@ struct netdev_bpf { #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { - int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); - void (*xdo_dev_state_delete) (struct xfrm_state *x); - void (*xdo_dev_state_free) (struct xfrm_state *x); + int (*xdo_dev_state_add)(struct net_device *dev, + struct xfrm_state *x, + struct netlink_ext_ack *extack); + void (*xdo_dev_state_delete)(struct net_device *dev, + struct xfrm_state *x); + void (*xdo_dev_state_free)(struct net_device *dev, + struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); @@ -1087,8 +1108,8 @@ struct netdev_net_notifier { * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Old-style ioctl entry point. This is used internally by the - * appletalk and ieee802154 subsystems but is no longer called by - * the device ioctl handler. + * ieee802154 subsystem but is no longer called by the device + * ioctl handler. * * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); * Used by the bonding driver for its device specific ioctls: @@ -1990,12 +2011,21 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ + * affinity. Set by netif_enable_irq_affinity(), then + * the driver must create a persistent napi by + * netif_napi_add_config() and finally bind the napi to + * IRQ (via netif_napi_set_irq()). + * + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. + * Set by calling netif_enable_cpu_rmap(). + * * @see_all_hwtstamp_requests: device wants to see calls to * ndo_hwtstamp_set() for all timestamp requests * regardless of source, even if those aren't * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN - * @netns_local: interface can't change network namespaces + * @netns_immutable: interface can't change network namespaces * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes * * @net_notifier_list: List of per-net netdev notifier block @@ -2045,6 +2075,7 @@ enum netdev_reg_state { * * @neighbours: List heads pointing to this device's neighbours' * dev_list, one per address-family. + * @hwprov: Tracks which PTP performs hardware packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2396,11 +2427,13 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; + bool irq_affinity_auto; + bool rx_cpu_rmap_auto; /* priv_flags_slow, ungrouped to save space */ unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; - unsigned long netns_local:1; + unsigned long netns_immutable:1; unsigned long fcoe_mtu:1; struct list_head net_notifier_list; @@ -2412,6 +2445,14 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + /** @cfg: net_device queue-related configuration */ + struct netdev_config *cfg; + /** + * @cfg_pending: same as @cfg but when device is being actively + * reconfigured includes any changes to the configuration + * requested by the user, but which may or may not be rejected. + */ + struct netdev_config *cfg_pending; struct ethtool_netdev_state *ethtool; /* protected by rtnl_lock */ @@ -2442,8 +2483,57 @@ struct net_device { u32 napi_defer_hard_irqs; /** - * @lock: protects @net_shaper_hierarchy, feel free to use for other - * netdev-scope protection. Ordering: take after rtnl_lock. + * @up: copy of @state's IFF_UP, but safe to read with just @lock. + * May report false negatives while the device is being opened + * or closed (@lock does not protect .ndo_open, or .ndo_close). + */ + bool up; + + /** + * @request_ops_lock: request the core to run all @netdev_ops and + * @ethtool_ops under the @lock. + */ + bool request_ops_lock; + + /** + * @lock: netdev-scope lock, protects a small selection of fields. + * Should always be taken using netdev_lock() / netdev_unlock() helpers. + * Drivers are free to use it for other protection. + * + * For the drivers that implement shaper or queue API, the scope + * of this lock is expanded to cover most ndo/queue/ethtool/sysfs + * operations. Drivers may opt-in to this behavior by setting + * @request_ops_lock. + * + * @lock protection mixes with rtnl_lock in multiple ways, fields are + * either: + * + * - simply protected by the instance @lock; + * + * - double protected - writers hold both locks, readers hold either; + * + * - ops protected - protected by the lock held around the NDOs + * and other callbacks, that is the instance lock on devices for + * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock; + * + * - double ops protected - always protected by rtnl_lock but for + * devices for which netdev_need_ops_lock() returns true - also + * the instance lock. + * + * Simply protects: + * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, + * @net_shaper_hierarchy, @reg_state, @threaded + * + * Double protects: + * @up + * + * Double ops protects: + * @real_num_rx_queues, @real_num_tx_queues + * + * Also protects some fields in: + * struct napi_struct, struct netdev_queue, struct netdev_rx_queue + * + * Ordering: take after rtnl_lock. */ struct mutex lock; @@ -2457,6 +2547,8 @@ struct net_device { struct hlist_head neighbours[NEIGH_NR_TABLES]; + struct hwtstamp_provider __rcu *hwprov; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; @@ -2562,21 +2654,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } -#define netdev_lockdep_set_classes(dev) \ -{ \ - static struct lock_class_key qdisc_tx_busylock_key; \ - static struct lock_class_key qdisc_xmit_lock_key; \ - static struct lock_class_key dev_addr_list_lock_key; \ - unsigned int i; \ - \ - (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ - lockdep_set_class(&(dev)->addr_list_lock, \ - &dev_addr_list_lock_key); \ - for (i = 0; i < (dev)->num_tx_queues; i++) \ - lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ - &qdisc_xmit_lock_key); \ -} - u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, @@ -2673,9 +2750,24 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, enum netdev_queue_type type, struct napi_struct *napi); +static inline void netdev_lock(struct net_device *dev) +{ + mutex_lock(&dev->lock); +} + +static inline void netdev_unlock(struct net_device *dev) +{ + mutex_unlock(&dev->lock); +} +/* Additional netdev_lock()-related helpers are in net/netdev_lock.h */ + +void netif_napi_set_irq_locked(struct napi_struct *napi, int irq); + static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) { - napi->irq = irq; + netdev_lock(napi->dev); + netif_napi_set_irq_locked(napi, irq); + netdev_unlock(napi->dev); } /* Default NAPI poll() weight @@ -2683,8 +2775,19 @@ static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) */ #define NAPI_POLL_WEIGHT 64 -void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight); +void netif_napi_add_weight_locked(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight); + +static inline void +netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + netdev_lock(dev); + netif_napi_add_weight_locked(dev, napi, poll, weight); + netdev_unlock(dev); +} /** * netif_napi_add() - initialize a NAPI context @@ -2703,6 +2806,13 @@ netif_napi_add(struct net_device *dev, struct napi_struct *napi, } static inline void +netif_napi_add_locked(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int)) +{ + netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); +} + +static inline void netif_napi_add_tx_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), @@ -2712,6 +2822,15 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } +static inline void +netif_napi_add_config_locked(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int index) +{ + napi->index = index; + napi->config = &dev->napi_config[index]; + netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); +} + /** * netif_napi_add_config - initialize a NAPI context with persistent config * @dev: network device @@ -2723,9 +2842,9 @@ static inline void netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int index) { - napi->index = index; - napi->config = &dev->napi_config[index]; - netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); + netdev_lock(dev); + netif_napi_add_config_locked(dev, napi, poll, index); + netdev_unlock(dev); } /** @@ -2745,6 +2864,8 @@ static inline void netif_napi_add_tx(struct net_device *dev, netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } +void __netif_napi_del_locked(struct napi_struct *napi); + /** * __netif_napi_del - remove a NAPI context * @napi: NAPI context @@ -2753,7 +2874,18 @@ static inline void netif_napi_add_tx(struct net_device *dev, * containing @napi. Drivers might want to call this helper to combine * all the needed RCU grace periods into a single one. */ -void __netif_napi_del(struct napi_struct *napi); +static inline void __netif_napi_del(struct napi_struct *napi) +{ + netdev_lock(napi->dev); + __netif_napi_del_locked(napi); + netdev_unlock(napi->dev); +} + +static inline void netif_napi_del_locked(struct napi_struct *napi) +{ + __netif_napi_del_locked(napi); + synchronize_net(); +} /** * netif_napi_del - remove a NAPI context @@ -2767,6 +2899,9 @@ static inline void netif_napi_del(struct napi_struct *napi) synchronize_net(); } +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs); +void netif_set_affinity_auto(struct net_device *dev); + struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; @@ -2813,9 +2948,9 @@ struct pcpu_sw_netstats { struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; - u64_stats_t rx_drops; u64_stats_t tx_packets; u64_stats_t tx_bytes; + u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); @@ -2860,6 +2995,46 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) u64_stats_update_end(&lstats->syncp); } +static inline void dev_dstats_rx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_packets); + u64_stats_add(&dstats->rx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_rx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_drops); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_packets); + u64_stats_add(&dstats->tx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_drops); + u64_stats_update_end(&dstats->syncp); +} + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ @@ -3160,9 +3335,12 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); bool netdev_name_in_use(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); +int netif_open(struct net_device *dev, struct netlink_ext_ack *extack); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); +void netif_close(struct net_device *dev); void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); +void netif_disable_lro(struct net_device *dev); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, @@ -3202,7 +3380,6 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); -void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, @@ -3216,7 +3393,6 @@ struct net_device *netdev_get_by_index(struct net *net, int ifindex, struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); -struct net_device *dev_get_by_napi_id(unsigned int napi_id); void netdev_copy_name(struct net_device *dev, char *name); static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, @@ -3330,6 +3506,7 @@ struct softnet_data { }; DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +DECLARE_PER_CPU(struct page_pool *, system_page_pool); #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) @@ -3818,7 +3995,7 @@ static inline bool netif_attr_test_mask(unsigned long j, * @online_mask: bitmask for CPUs/Rx queues that are online * @nr_bits: number of bits in the bitmask * - * Returns true if a CPU/Rx queue is online. + * Returns: true if a CPU/Rx queue is online. */ static inline bool netif_attr_test_online(unsigned long j, const unsigned long *online_mask, @@ -3838,7 +4015,8 @@ static inline bool netif_attr_test_online(unsigned long j, * @srcp: the cpumask/Rx queue mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set. + * Returns: next (after n) CPU/Rx queue index in the mask; + * >= nr_bits if no further CPUs/Rx queues set. */ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) @@ -3860,7 +4038,8 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, * @src2p: the second CPUs/Rx queues mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set in both. + * Returns: next (after n) CPU/Rx queue index set in both masks; + * >= nr_bits if no further CPUs/Rx queues set in both. */ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, @@ -3907,17 +4086,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) } int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); - -#ifdef CONFIG_SYSFS int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); -#else -static inline int netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxqs) -{ - dev->real_num_rx_queues = rxqs; - return 0; -} -#endif int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); @@ -3966,9 +4135,9 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog); -void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); + const struct bpf_prog *xdp_prog); +void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog); +int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); @@ -3976,10 +4145,15 @@ int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); void netif_receive_skb_list(struct list_head *head); -gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -void napi_gro_flush(struct napi_struct *napi, bool flush_old); +gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb); + +static inline gro_result_t napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb) +{ + return gro_receive_skb(&napi->gro, skb); +} + struct sk_buff *napi_get_frags(struct napi_struct *napi); -void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) @@ -4004,6 +4178,8 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd); int generic_hwtstamp_get_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg); int generic_hwtstamp_set_lower(struct net_device *dev, @@ -4013,22 +4189,25 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat, int new_ifindex); -static inline + const char *pat, int new_ifindex, + struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat) -{ - return __dev_change_net_namespace(dev, net, pat, 0); -} + const char *pat); int __dev_set_mtu(struct net_device *, int); +int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); +int netif_set_mac_address(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, @@ -4044,7 +4223,9 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); +int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); +u8 dev_xdp_sb_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); u32 dev_get_min_mp_channel_count(const struct net_device *dev); @@ -4111,7 +4292,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, return 0; } -bool dev_nit_active(struct net_device *dev); +bool dev_nit_active_rcu(const struct net_device *dev); +static inline bool dev_nit_active(const struct net_device *dev) +{ + bool ret; + + rcu_read_lock(); + ret = dev_nit_active_rcu(dev); + rcu_read_unlock(); + return ret; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); static inline void __dev_put(struct net_device *dev) @@ -4242,6 +4433,7 @@ void linkwatch_fire_event(struct net_device *dev); * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); +void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4256,7 +4448,7 @@ static inline bool netif_carrier_ok(const struct net_device *dev) unsigned long dev_trans_start(struct net_device *dev); -void __netdev_watchdog_up(struct net_device *dev); +void netdev_watchdog_up(struct net_device *dev); void netif_carrier_on(struct net_device *dev); void netif_carrier_off(struct net_device *dev); @@ -4650,6 +4842,9 @@ int devm_register_netdev(struct device *dev, struct net_device *ndev); /* General hardware address lists handling functions */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); +int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, @@ -4781,8 +4976,11 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); +int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); +void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); @@ -5100,6 +5298,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_ACCECN != + (NETIF_F_GSO_ACCECN >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5897f3dbaf7c..f39f688d7285 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -357,7 +357,7 @@ extern struct static_key xt_tee_enabled; * Begin packet processing : all readers must wait the end * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) - * Returns : + * Returns: * 1 if no recursion on this cpu * 0 if recursion detected */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index 8676316547cc..3175073a66ba 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -66,7 +66,6 @@ static inline bool nf_hook_egress_active(void) * @rc: result code which shall be returned by __dev_queue_xmit() on failure * @dev: netdev whose egress hooks shall be applied to @skb * - * Returns @skb on success or %NULL if the packet was consumed or filtered. * Caller must hold rcu_read_lock. * * On ingress, packets are classified first by tc, then by netfilter. @@ -81,6 +80,8 @@ static inline bool nf_hook_egress_active(void) * called recursively by tunnel drivers such as vxlan, the flag is reverted to * false after sch_handle_egress(). This ensures that netfilter is applied * both on the overlay and underlying network. + * + * Returns: @skb on success or %NULL if the packet was consumed or filtered. */ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, struct net_device *dev) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ecdd5ced16a8..1464b3a10498 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -18,9 +18,11 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/uio.h> +#include <linux/rolling_buffer.h> enum netfs_sreq_ref_trace; typedef struct mempool_s mempool_t; +struct folio_queue; /** * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED] @@ -49,8 +51,7 @@ enum netfs_io_source { NETFS_INVALID_WRITE, } __mode(byte); -typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, - bool was_async); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error); /* * Per-inode context. This wraps the VFS inode. @@ -71,6 +72,7 @@ struct netfs_inode { #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ #define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */ +#define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */ }; /* @@ -178,9 +180,6 @@ struct netfs_io_subrequest { unsigned long long start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ - size_t consumed; /* Amount of read data consumed */ - size_t prev_donated; /* Amount of data donated from previous subreq */ - size_t next_donated; /* Amount of data donated from next subreq */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ @@ -188,9 +187,6 @@ struct netfs_io_subrequest { u8 retry_count; /* The number of retries (0 on initial pass) */ enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* I/O stream this belongs to */ - unsigned char curr_folioq_slot; /* Folio currently being read */ - unsigned char curr_folio_order; /* Order of folio */ - struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ @@ -208,9 +204,12 @@ enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ + NETFS_READ_SINGLE, /* This read should be treated as a single object */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_UNBUFFERED_READ, /* This is an unbuffered read */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ + NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_DIO_WRITE, /* This is a direct I/O write */ @@ -224,23 +223,24 @@ enum netfs_io_origin { */ struct netfs_io_request { union { - struct work_struct work; + struct work_struct cleanup_work; /* Deferred cleanup work */ struct rcu_head rcu; }; + struct work_struct work; /* Result collector work */ struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ struct readahead_control *ractl; /* Readahead descriptor */ struct list_head proc_link; /* Link in netfs_iorequests */ - struct list_head subrequests; /* Contributory I/O operations */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams struct netfs_group *group; /* Writeback group being written back */ - struct folio_queue *buffer; /* Head of I/O buffer */ - struct folio_queue *buffer_tail; /* Tail of I/O buffer */ - struct iov_iter iter; /* Unencrypted-side iterator */ - struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ + struct rolling_buffer buffer; /* Unencrypted buffer */ +#define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1 +#define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2 + wait_queue_head_t waitq; /* Processor waiter */ void *netfs_priv; /* Private data for the netfs */ void *netfs_priv2; /* Private data for the netfs */ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ @@ -251,35 +251,36 @@ struct netfs_io_request { atomic_t subreq_counter; /* Next subreq->debug_index */ unsigned int nr_group_rel; /* Number of refs to release on ->group */ spinlock_t lock; /* Lock for queuing subreqs */ - atomic_t nr_outstanding; /* Number of ops in progress */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ long error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ - u8 buffer_head_slot; /* First slot in ->buffer */ - u8 buffer_tail_slot; /* Next slot in ->buffer_tail */ unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ + unsigned long long abandon_to; /* Position to abandon folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ - size_t prev_donated; /* Fallback for subreq->prev_donated */ + unsigned char front_folio_order; /* Order (size) of front folio */ refcount_t ref; unsigned long flags; +#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ +#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; @@ -409,6 +410,13 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * struct netfs_group *netfs_group); ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); +/* Single, monolithic object read/write API. */ +void netfs_single_mark_inode_dirty(struct inode *inode); +ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter); +int netfs_writeback_single(struct address_space *mapping, + struct writeback_control *wbc, + struct iov_iter *iter); + /* Address operations API */ struct readahead_control; void netfs_readahead(struct readahead_control *); @@ -428,22 +436,19 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp); vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); /* (Sub)request management API. */ -void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, - bool was_async); -void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, - int error, bool was_async); +void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq); +void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); + enum netfs_sreq_ref_trace what); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); @@ -453,6 +458,18 @@ void netfs_end_io_write(struct inode *inode); int netfs_start_io_direct(struct inode *inode); void netfs_end_io_direct(struct inode *inode); +/* Miscellaneous APIs. */ +struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp, + unsigned int trace /*enum netfs_folioq_trace*/); +void netfs_folioq_free(struct folio_queue *folioq, + unsigned int trace /*enum netfs_trace_folioq*/); + +/* Buffer wrangling helpers API. */ +int netfs_alloc_folioq_buffer(struct address_space *mapping, + struct folio_queue **_buffer, + size_t *_cur_size, ssize_t size, gfp_t gfp); +void netfs_free_folioq_buffer(struct folio_queue *fq); + /** * netfs_inode - Get the netfs inode context from the inode * @inode: The inode to query diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index b34301650c47..0477208ed9ff 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -25,7 +25,13 @@ union inet_addr { struct netpoll { struct net_device *dev; netdevice_tracker dev_tracker; + /* + * Either dev_name or dev_mac can be used to specify the local + * interface - dev_name is used if it is a nonempty string, else + * dev_mac is used. + */ char dev_name[IFNAMSIZ]; + u8 dev_mac[ETH_ALEN]; const char *name; union inet_addr local_ip, remote_ip; @@ -33,6 +39,7 @@ struct netpoll { u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; struct sk_buff_head skb_pool; + struct work_struct refill_wq; }; struct netpoll_info { @@ -57,7 +64,7 @@ static inline void netpoll_poll_disable(struct net_device *dev) { return; } static inline void netpoll_poll_enable(struct net_device *dev) { return; } #endif -void netpoll_send_udp(struct netpoll *np, const char *msg, int len); +int netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 9ad727ddfedb..0906a0b40c6a 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -55,7 +55,6 @@ enum nfs3_stable_how { NFS_INVALID_STABLE_HOW = -1 }; -#ifdef CONFIG_CRC32 /** * nfs_fhandle_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -67,10 +66,4 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); } -#else /* CONFIG_CRC32 */ -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} -#endif /* CONFIG_CRC32 */ #endif /* _LINUX_NFS_H */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d7430d9f218..d8cad844870a 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -17,6 +17,7 @@ #include <linux/uidgid.h> #include <uapi/linux/nfs4.h> #include <linux/sunrpc/msg_prot.h> +#include <linux/sunrpc/xdrgen/nfs4_1.h> enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, @@ -46,6 +47,7 @@ struct nfs4_acl { struct nfs4_label { uint32_t lfs; uint32_t pi; + u32 lsmid; u32 len; char *label; }; @@ -298,6 +300,7 @@ enum nfsstat4 { /* error codes for internal client use */ #define NFS4ERR_RESET_TO_MDS 12001 #define NFS4ERR_RESET_TO_PNFS 12002 +#define NFS4ERR_FATAL_IOERROR 12003 static inline bool seqid_mutating_err(u32 err) { @@ -365,7 +368,7 @@ enum limit_by4 { NFS4_LIMIT_BLOCKS = 2 }; -enum open_delegation_type4 { +enum nfs4_open_delegation_type4 { NFS4_OPEN_DELEGATE_NONE = 0, NFS4_OPEN_DELEGATE_READ = 1, NFS4_OPEN_DELEGATE_WRITE = 2, @@ -512,12 +515,6 @@ enum { FATTR4_XATTR_SUPPORT = 82, }; -enum { - FATTR4_TIME_DELEG_ACCESS = 84, - FATTR4_TIME_DELEG_MODIFY = 85, - FATTR4_OPEN_ARGUMENTS = 86, -}; - /* * The following internal definitions enable processing the above * attribute bits within 32-bit word boundaries. @@ -695,6 +692,7 @@ enum { NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, NFSPROC4_CLNT_READ_PLUS, + NFSPROC4_CLNT_OFFLOAD_STATUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 039898d70954..67ae2c3f41d2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,23 @@ struct nfs_lock_context { struct rcu_head rcu_head; }; +struct nfs_file_localio { + struct nfsd_file __rcu *ro_file; + struct nfsd_file __rcu *rw_file; + struct list_head list; + void __rcu *nfs_uuid; /* opaque pointer to 'nfs_uuid_t' */ +}; + +static inline void nfs_localio_file_init(struct nfs_file_localio *nfl) +{ +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + nfl->ro_file = NULL; + nfl->rw_file = NULL; + INIT_LIST_HEAD(&nfl->list); + nfl->nfs_uuid = NULL; +#endif +} + struct nfs4_state; struct nfs_open_context { struct nfs_lock_context lock_context; @@ -87,15 +104,16 @@ struct nfs_open_context { struct nfs4_state *state; fmode_t mode; + int error; unsigned long flags; #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) #define NFS_CONTEXT_FILE_OPEN (4) - int error; - struct list_head list; struct nfs4_threshold *mdsthreshold; + struct list_head list; struct rcu_head rcu_head; + struct nfs_file_localio nfl; }; struct nfs_open_dir_context { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b804346a9741..ee03f3cef30c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -50,7 +50,7 @@ struct nfs_client { #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ #define NFS_CS_PNFS 9 /* - Server used for pnfs */ -#define NFS_CS_LOCAL_IO 10 /* - client is local */ +#define NFS_CS_NETUNREACH_FATAL 10 /* - ENETUNREACH errors are fatal */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -132,7 +132,7 @@ struct nfs_client { struct timespec64 cl_nfssvc_boot; seqlock_t cl_boot_lock; nfs_uuid_t cl_uuid; - spinlock_t cl_localio_lock; + struct work_struct cl_local_probe_work; #endif /* CONFIG_NFS_LOCALIO */ }; @@ -168,6 +168,8 @@ struct nfs_server { #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 #define NFS_MOUNT_SHUTDOWN 0x08000000 #define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 +#define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 +#define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ @@ -211,6 +213,15 @@ struct nfs_server { char *fscache_uniq; /* Uniquifier (or NULL) */ #endif + /* The following #defines numerically match the NFSv4 equivalents */ +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) +#define NFS_FH_VOLATILE_ANY (0x2) +#define NFS_FH_VOL_MIGRATION (0x4) +#define NFS_FH_VOL_RENAME (0x8) +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ u32 pnfs_blksize; /* layout_blksize attr */ #if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set @@ -234,9 +245,6 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ @@ -251,6 +259,10 @@ struct nfs_server { struct list_head ss_copies; struct list_head ss_src_copies; + unsigned long delegation_flags; +#define NFS4SERV_DELEGRETURN (1) +#define NFS4SERV_DELEGATION_EXPIRED (2) +#define NFS4SERV_DELEGRETURN_DELAYED (3) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; @@ -290,6 +302,7 @@ struct nfs_server { #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) +#define NFS_CAP_OFFLOAD_STATUS (1U << 9) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 559273a0f16d..67f6632f723b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1315,11 +1315,6 @@ struct nfs4_fsid_present_res { #endif /* CONFIG_NFS_V4 */ -struct nfstime4 { - u64 seconds; - u32 nseconds; -}; - #ifdef CONFIG_NFS_V4_1 struct pnfs_commit_bucket { @@ -1520,8 +1515,9 @@ struct nfs42_offload_status_args { struct nfs42_offload_status_res { struct nfs4_sequence_res osr_seq_res; - uint64_t osr_count; - int osr_status; + u64 osr_count; + int complete_count; + u32 osr_complete; }; struct nfs42_copy_notify_args { @@ -1637,6 +1633,7 @@ enum { NFS_IOHDR_RESEND_PNFS, NFS_IOHDR_RESEND_MDS, NFS_IOHDR_UNSTABLE_WRITES, + NFS_IOHDR_ODIRECT, }; struct nfs_io_completion; @@ -1785,7 +1782,7 @@ struct nfs_rpc_ops { struct nfs_fattr *, struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct inode *, struct dentry *, + int (*lookup) (struct inode *, struct dentry *, const struct qstr *, struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *); @@ -1806,7 +1803,7 @@ struct nfs_rpc_ops { int (*link) (struct inode *, struct inode *, const struct qstr *); int (*symlink) (struct inode *, struct dentry *, struct folio *, unsigned int, struct iattr *); - int (*mkdir) (struct inode *, struct dentry *, struct iattr *); + struct dentry *(*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *); int (*mknod) (struct inode *, struct dentry *, struct iattr *, diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 9202f4b24343..5c7c92659e73 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -6,9 +6,6 @@ #ifndef __LINUX_NFSLOCALIO_H #define __LINUX_NFSLOCALIO_H -/* nfsd_file structure is purposely kept opaque to NFS client */ -struct nfsd_file; - #if IS_ENABLED(CONFIG_NFS_LOCALIO) #include <linux/module.h> @@ -19,6 +16,9 @@ struct nfsd_file; #include <linux/nfs.h> #include <net/net_namespace.h> +struct nfs_client; +struct nfs_file_localio; + /* * Useful to allow a client to negotiate if localio * possible with its server. @@ -27,35 +27,43 @@ struct nfsd_file; */ typedef struct { uuid_t uuid; + unsigned nfs3_localio_probe_count; + /* this struct is over a cacheline, avoid bouncing */ + spinlock_t ____cacheline_aligned lock; struct list_head list; + spinlock_t *list_lock; /* nn->local_clients_lock */ struct net __rcu *net; /* nfsd's network namespace */ struct auth_domain *dom; /* auth_domain for localio */ + /* Local files to close when net is shut down or exports change */ + struct list_head files; } nfs_uuid_t; void nfs_uuid_init(nfs_uuid_t *); bool nfs_uuid_begin(nfs_uuid_t *); void nfs_uuid_end(nfs_uuid_t *); -void nfs_uuid_is_local(const uuid_t *, struct list_head *, +void nfs_uuid_is_local(const uuid_t *, struct list_head *, spinlock_t *, struct net *, struct auth_domain *, struct module *); -void nfs_uuid_invalidate_clients(struct list_head *list); -void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid); + +void nfs_localio_enable_client(struct nfs_client *clp); +void nfs_localio_disable_client(struct nfs_client *clp); +void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, + spinlock_t *nn_local_clients_lock); /* localio needs to map filehandle -> struct nfsd_file */ -extern struct nfsd_file * -nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, - const struct cred *, const struct nfs_fh *, - const fmode_t) __must_hold(rcu); +void nfs_close_local_fh(struct nfs_file_localio *); struct nfsd_localio_operations { - bool (*nfsd_serv_try_get)(struct net *); - void (*nfsd_serv_put)(struct net *); + bool (*nfsd_net_try_get)(struct net *); + void (*nfsd_net_put)(struct net *); struct nfsd_file *(*nfsd_open_local_fh)(struct net *, struct auth_domain *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, + struct nfsd_file __rcu **pnf, const fmode_t); - struct net *(*nfsd_file_put_local)(struct nfsd_file *); + struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); + struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -64,39 +72,51 @@ extern const struct nfsd_localio_operations *nfs_to; struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, - const struct nfs_fh *, const fmode_t); + const struct nfs_fh *, struct nfs_file_localio *, + struct nfsd_file __rcu **pnf, + const fmode_t); static inline void nfs_to_nfsd_net_put(struct net *net) { /* - * Once reference to nfsd_serv is dropped, NFSD could be - * unloaded, so ensure safe return from nfsd_file_put_local() - * by always taking RCU. + * Once reference to net (and associated nfsd_serv) is dropped, NFSD + * could be unloaded, so ensure safe return from nfsd_net_put() by + * always taking RCU. */ rcu_read_lock(); - nfs_to->nfsd_serv_put(net); + nfs_to->nfsd_net_put(net); rcu_read_unlock(); } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file __rcu **localio) { /* - * Must not hold RCU otherwise nfsd_file_put() can easily trigger: - * "Voluntary context switch within RCU read-side critical section!" - * by scheduling deep in underlying filesystem (e.g. XFS). + * Either *localio must be guaranteed to be non-NULL, or caller + * must prevent nfsd shutdown from completing as nfs_close_local_fh() + * does by blocking the nfs_uuid from being finally put. */ - struct net *net = nfs_to->nfsd_file_put_local(localio); + struct net *net; + + net = nfs_to->nfsd_file_put_local(localio); - nfs_to_nfsd_net_put(net); + if (net) + nfs_to_nfsd_net_put(net); } #else /* CONFIG_NFS_LOCALIO */ + +struct nfs_file_localio; +static inline void nfs_close_local_fh(struct nfs_file_localio *nfl) +{ +} static inline void nfsd_localio_ops_init(void) { } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +struct nfs_client; +static inline void nfs_localio_disable_client(struct nfs_client *clp) { } + #endif /* CONFIG_NFS_LOCALIO */ #endif /* __LINUX_NFSLOCALIO_H */ diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a8dfb38c9bb6..e78fa535f61d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -17,7 +17,6 @@ void lockup_detector_init(void); void lockup_detector_retry_init(void); void lockup_detector_soft_poweroff(void); -void lockup_detector_cleanup(void); extern int watchdog_user_enabled; extern int watchdog_thresh; @@ -37,7 +36,6 @@ extern int sysctl_hardlockup_all_cpu_backtrace; static inline void lockup_detector_init(void) { } static inline void lockup_detector_retry_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } -static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -104,12 +102,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); -extern void hardlockup_detector_perf_cleanup(void); extern void hardlockup_config_perf_event(const char *str); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } -static inline void hardlockup_detector_perf_cleanup(void) { } static inline void hardlockup_config_perf_event(const char *str) { } #endif diff --git a/include/linux/node.h b/include/linux/node.h index 9a881c2208b3..2b7517892230 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -57,6 +57,11 @@ enum cache_write_policy { NODE_CACHE_WRITE_OTHER, }; +enum cache_mode { + NODE_CACHE_ADDR_MODE_RESERVED, + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR, +}; + /** * struct node_cache_attrs - system memory caching attributes * @@ -65,6 +70,7 @@ enum cache_write_policy { * @size: Total size of cache in bytes * @line_size: Number of bytes fetched on a cache miss * @level: The cache hierarchy level + * @address_mode: The address mode */ struct node_cache_attrs { enum cache_indexing indexing; @@ -72,6 +78,7 @@ struct node_cache_attrs { u64 size; u16 line_size; u8 level; + u16 address_mode; }; #ifdef CONFIG_HMEM_REPORTING diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 9fd7a0ce9c1a..f0ac0633366b 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -94,7 +94,6 @@ #include <linux/bitmap.h> #include <linux/minmax.h> #include <linux/nodemask_types.h> -#include <linux/numa.h> #include <linux/random.h> extern nodemask_t _unused_nodemask_arg_; @@ -191,6 +190,13 @@ static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *s bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES) +static __always_inline void __nodes_copy(nodemask_t *dstp, + const nodemask_t *srcp, unsigned int nbits) +{ + bitmap_copy(dstp->bits, srcp->bits, nbits); +} + #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_complement(nodemask_t *dstp, diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h index 6b28d97ea6ed..f850a48742f1 100644 --- a/include/linux/nodemask_types.h +++ b/include/linux/nodemask_types.h @@ -3,7 +3,16 @@ #define __LINUX_NODEMASK_TYPES_H #include <linux/bitops.h> -#include <linux/numa.h> + +#ifdef CONFIG_NODES_SHIFT +#define NODES_SHIFT CONFIG_NODES_SHIFT +#else +#define NODES_SHIFT 0 +#endif + +#define MAX_NUMNODES (1 << NODES_SHIFT) + +#define NUMA_NO_NODE (-1) typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; diff --git a/include/linux/numa.h b/include/linux/numa.h index 3567e40329eb..e6baaf6051bc 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -3,16 +3,8 @@ #define _LINUX_NUMA_H #include <linux/init.h> #include <linux/types.h> +#include <linux/nodemask.h> -#ifdef CONFIG_NODES_SHIFT -#define NODES_SHIFT CONFIG_NODES_SHIFT -#else -#define NODES_SHIFT 0 -#endif - -#define MAX_NUMNODES (1 << NODES_SHIFT) - -#define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) static inline bool numa_valid_node(int nid) @@ -39,6 +31,8 @@ void __init alloc_offline_node_data(int nid); /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); +int nearest_node_nodemask(int node, nodemask_t *mask); + #ifndef memory_add_physaddr_to_nid int memory_add_physaddr_to_nid(u64 start); #endif @@ -55,6 +49,11 @@ static inline int numa_nearest_node(int node, unsigned int state) return NUMA_NO_NODE; } +static inline int nearest_node_nodemask(int node, nodemask_t *mask) +{ + return NUMA_NO_NODE; +} + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index cfad6ce7e1bd..dd85613cdd86 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -29,7 +29,10 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi); int __init numa_memblks_init(int (*init_func)(void), bool memblock_force_top_down); +extern int numa_distance_cnt; + #ifdef CONFIG_NUMA_EMU +extern int emu_nid_to_phys[MAX_NUMNODES]; int numa_emu_cmdline(char *str); void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, unsigned int nr_emu_nids); diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index c1d0bc5d9624..60e069a6757f 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -40,5 +40,12 @@ int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, u8 *ctrl_key, size_t ctrl_key_len, u8 *sess_key, size_t sess_key_len); +int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, + u8 *c1, u8 *c2, size_t hash_len, + u8 **ret_psk, size_t *ret_len); +int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len, + char *subsysnqn, char *hostnqn, u8 **ret_digest); +int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len, + u8 *psk_digest, u8 **ret_psk); #endif /* _NVME_AUTH_H */ diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 19d2b256180f..ab8971afa973 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -6,15 +6,25 @@ #ifndef _NVME_KEYRING_H #define _NVME_KEYRING_H +#include <linux/key.h> + #if IS_ENABLED(CONFIG_NVME_KEYRING) +struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, const char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest); key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); struct key *nvme_tls_key_lookup(key_serial_t key_id); #else - +static inline struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest) +{ + return ERR_PTR(-ENOTSUPP); +} static inline key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn) { diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 13377dde4527..5d7afb6079f1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -64,6 +64,7 @@ enum { /* Transport Type codes for Discovery Log Page entry TRTYPE field */ enum { + NVMF_TRTYPE_PCI = 0, /* PCI */ NVMF_TRTYPE_RDMA = 1, /* RDMA */ NVMF_TRTYPE_FC = 2, /* Fibre Channel */ NVMF_TRTYPE_TCP = 3, /* TCP/IP */ @@ -198,28 +199,54 @@ enum { #define NVME_NVM_IOSQES 6 #define NVME_NVM_IOCQES 4 +/* + * Controller Configuration (CC) register (Offset 14h) + */ enum { + /* Enable (EN): bit 0 */ NVME_CC_ENABLE = 1 << 0, NVME_CC_EN_SHIFT = 0, + + /* Bits 03:01 are reserved (NVMe Base Specification rev 2.1) */ + + /* I/O Command Set Selected (CSS): bits 06:04 */ NVME_CC_CSS_SHIFT = 4, - NVME_CC_MPS_SHIFT = 7, - NVME_CC_AMS_SHIFT = 11, - NVME_CC_SHN_SHIFT = 14, - NVME_CC_IOSQES_SHIFT = 16, - NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT, - NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, + + /* Memory Page Size (MPS): bits 10:07 */ + NVME_CC_MPS_SHIFT = 7, + NVME_CC_MPS_MASK = 0xf << NVME_CC_MPS_SHIFT, + + /* Arbitration Mechanism Selected (AMS): bits 13:11 */ + NVME_CC_AMS_SHIFT = 11, + NVME_CC_AMS_MASK = 7 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, + + /* Shutdown Notification (SHN): bits 15:14 */ + NVME_CC_SHN_SHIFT = 14, + NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, - NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, + + /* I/O Submission Queue Entry Size (IOSQES): bits 19:16 */ + NVME_CC_IOSQES_SHIFT = 16, + NVME_CC_IOSQES_MASK = 0xf << NVME_CC_IOSQES_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, + + /* I/O Completion Queue Entry Size (IOCQES): bits 23:20 */ + NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_IOCQES_MASK = 0xf << NVME_CC_IOCQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + + /* Controller Ready Independent of Media Enable (CRIME): bit 24 */ NVME_CC_CRIME = 1 << 24, + + /* Bits 25:31 are reserved (NVMe Base Specification rev 2.1) */ }; enum { @@ -275,6 +302,7 @@ enum nvme_ctrl_attr { NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), NVME_CTRL_ATTR_TBKAS = (1 << 6), NVME_CTRL_ATTR_ELBAS = (1 << 15), + NVME_CTRL_ATTR_RHII = (1 << 18), }; struct nvme_id_ctrl { @@ -1744,6 +1772,13 @@ enum { NVME_AUTH_DHGROUP_INVALID = 0xff, }; +enum { + NVME_AUTH_SECP_NOSC = 0x00, + NVME_AUTH_SECP_SC = 0x01, + NVME_AUTH_SECP_NEWTLSPSK = 0x02, + NVME_AUTH_SECP_REPLACETLSPSK = 0x03, +}; + union nvmf_auth_protocol { struct nvmf_auth_dhchap_protocol_descriptor dhchap; }; @@ -1896,6 +1931,46 @@ static inline bool nvme_is_fabrics(const struct nvme_command *cmd) return cmd->common.opcode == nvme_fabrics_command; } +#ifdef CONFIG_NVME_VERBOSE_ERRORS +const char *nvme_get_error_status_str(u16 status); +const char *nvme_get_opcode_str(u8 opcode); +const char *nvme_get_admin_opcode_str(u8 opcode); +const char *nvme_get_fabrics_opcode_str(u8 opcode); +#else /* CONFIG_NVME_VERBOSE_ERRORS */ +static inline const char *nvme_get_error_status_str(u16 status) +{ + return "I/O Error"; +} +static inline const char *nvme_get_opcode_str(u8 opcode) +{ + return "I/O Cmd"; +} +static inline const char *nvme_get_admin_opcode_str(u8 opcode) +{ + return "Admin Cmd"; +} + +static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) +{ + return "Fabrics Cmd"; +} +#endif /* CONFIG_NVME_VERBOSE_ERRORS */ + +static inline const char *nvme_opcode_str(int qid, u8 opcode) +{ + return qid ? nvme_get_opcode_str(opcode) : + nvme_get_admin_opcode_str(opcode); +} + +static inline const char *nvme_fabrics_opcode_str( + int qid, const struct nvme_command *cmd) +{ + if (nvme_is_fabrics(cmd)) + return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype); + + return nvme_opcode_str(qid, cmd->common.opcode); +} + struct nvme_error_slot { __le64 error_count; __le16 sqid; @@ -2019,7 +2094,7 @@ enum { NVME_SC_BAD_ATTRIBUTES = 0x180, NVME_SC_INVALID_PI = 0x181, NVME_SC_READ_ONLY = 0x182, - NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + NVME_SC_CMD_SIZE_LIM_EXCEEDED = 0x183, /* * I/O Command Set Specific - Fabrics commands: diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 3ebeaa0ded00..515676ebe598 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -92,8 +92,8 @@ struct nvmem_cell_info { * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. * @of_node: If given, this will be used instead of the parent's of_node. - * @reg_read: Callback to read data. - * @reg_write: Callback to write data. + * @reg_read: Callback to read data; return zero if successful. + * @reg_write: Callback to write data; return zero if successful. * @size: Device size. * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. diff --git a/include/linux/objpool.h b/include/linux/objpool.h index cb1758eaa2d3..b713a1fe7521 100644 --- a/include/linux/objpool.h +++ b/include/linux/objpool.h @@ -170,17 +170,16 @@ static inline void *objpool_pop(struct objpool_head *pool) { void *obj = NULL; unsigned long flags; - int i, cpu; + int start, cpu; /* disable local irq to avoid preemption & interruption */ raw_local_irq_save(flags); - cpu = raw_smp_processor_id(); - for (i = 0; i < pool->nr_possible_cpus; i++) { + start = raw_smp_processor_id(); + for_each_possible_cpu_wrap(cpu, start) { obj = __objpool_try_get_slot(pool, cpu); if (obj) break; - cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1); } raw_local_irq_restore(flags); diff --git a/include/linux/objtool.h b/include/linux/objtool.h index b3b8d3dab52d..366ad004d794 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -45,35 +45,31 @@ #define STACK_FRAME_NON_STANDARD_FP(func) #endif -#define ANNOTATE_NOENDBR \ - "986: \n\t" \ - ".pushsection .discard.noendbr\n\t" \ - ".long 986b\n\t" \ - ".popsection\n\t" - #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long 998b\n\t" \ ".popsection\n\t" -#else /* __ASSEMBLY__ */ +#define __ASM_BREF(label) label ## b -/* - * This macro indicates that the following intra-function call is valid. - * Any non-annotated intra-function call will cause objtool to issue a warning. - */ -#define ANNOTATE_INTRA_FUNCTION_CALL \ - 999: \ - .pushsection .discard.intra_function_calls; \ - .long 999b; \ - .popsection; +#define __ASM_ANNOTATE(label, type) \ + ".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t" \ + ".long " __stringify(label) " - .\n\t" \ + ".long " __stringify(type) "\n\t" \ + ".popsection\n\t" + +#define ASM_ANNOTATE(type) \ + "911:\n\t" \ + __ASM_ANNOTATE(911b, type) + +#else /* __ASSEMBLY__ */ /* * In asm, there are two kinds of code: normal C-type callable functions and * the rest. The normal callable functions can be called by other code, and * don't do anything unusual with the stack. Such normal callable functions - * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * are annotated with SYM_FUNC_{START,END}. Most asm code falls in this * category. In this case, no special debugging annotations are needed because * objtool can automatically generate the ORC data for the ORC unwinder to read * at runtime. @@ -115,34 +111,11 @@ #endif .endm -.macro ANNOTATE_NOENDBR +.macro ANNOTATE type:req .Lhere_\@: - .pushsection .discard.noendbr - .long .Lhere_\@ - .popsection -.endm - -/* - * Use objtool to validate the entry requirement that all code paths do - * VALIDATE_UNRET_END before RET. - * - * NOTE: The macro must be used at the beginning of a global symbol, otherwise - * it will be ignored. - */ -.macro VALIDATE_UNRET_BEGIN -#if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) -.Lhere_\@: - .pushsection .discard.validate_unret + .pushsection .discard.annotate_insn,"M",@progbits,8 .long .Lhere_\@ - . - .popsection -#endif -.endm - -.macro REACHABLE -.Lhere_\@: - .pushsection .discard.reachable - .long .Lhere_\@ + .long \type .popsection .endm @@ -155,20 +128,79 @@ #define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) -#define ANNOTATE_NOENDBR -#define ASM_REACHABLE +#define __ASM_ANNOTATE(label, type) "" +#define ASM_ANNOTATE(type) #else -#define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -.macro ANNOTATE_NOENDBR -.endm -.macro REACHABLE +.macro ANNOTATE type:req .endm #endif #endif /* CONFIG_OBJTOOL */ +#ifndef __ASSEMBLY__ +/* + * Annotate away the various 'relocation to !ENDBR` complaints; knowing that + * these relocations will never be used for indirect calls. + */ +#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) +#define ANNOTATE_NOENDBR_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR)) + +/* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +#define ANNOTATE_RETPOLINE_SAFE ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE) +/* + * See linux/instrumentation.h + */ +#define ANNOTATE_INSTR_BEGIN(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN) +#define ANNOTATE_INSTR_END(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_END) +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +#define ANNOTATE_IGNORE_ALTERNATIVE ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS) +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL ASM_ANNOTATE(ANNOTYPE_INTRA_FUNCTION_CALL) +/* + * Use objtool to validate the entry requirement that all code paths do + * VALIDATE_UNRET_END before RET. + * + * NOTE: The macro must be used at the beginning of a global symbol, otherwise + * it will be ignored. + */ +#define ANNOTATE_UNRET_BEGIN ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN) +/* + * This should be used to refer to an instruction that is considered + * terminating, like a noreturn CALL or UD2 when we know they are not -- eg + * WARN using UD2. + */ +#define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) + +#else +#define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR +#define ANNOTATE_RETPOLINE_SAFE ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE +/* ANNOTATE_INSTR_BEGIN ANNOTATE type=ANNOTYPE_INSTR_BEGIN */ +/* ANNOTATE_INSTR_END ANNOTATE type=ANNOTYPE_INSTR_END */ +#define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS +#define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL +#define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN +#define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#endif + +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) +#define VALIDATE_UNRET_BEGIN ANNOTATE_UNRET_BEGIN +#else +#define VALIDATE_UNRET_BEGIN +#endif + #endif /* _LINUX_OBJTOOL_H */ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 453a4f4ef39d..df5d9fa84dba 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -54,4 +54,16 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/include/linux/of.h b/include/linux/of.h index f921786cb8ac..a62154aeda1b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -67,7 +67,7 @@ struct device_node { #endif }; -#define MAX_PHANDLE_ARGS 16 +#define MAX_PHANDLE_ARGS NR_FWNODE_REFERENCE_ARGS struct of_phandle_args { struct device_node *np; int args_count; @@ -301,6 +301,8 @@ extern struct device_node *of_get_compatible_child(const struct device_node *par const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); +extern struct device_node *of_get_available_child_by_name(const struct device_node *node, + const char *name); /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); @@ -311,8 +313,12 @@ extern struct device_node *of_find_node_with_property( extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); +extern bool of_property_read_bool(const struct device_node *np, const char *propname); extern int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size); +extern int of_property_read_u16_index(const struct device_node *np, + const char *propname, + u32 index, u16 *out_value); extern int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value); @@ -397,7 +403,6 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it, uint32_t *args, int size); -extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(const struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); @@ -578,6 +583,13 @@ static inline struct device_node *of_get_child_by_name( return NULL; } +static inline struct device_node *of_get_available_child_by_name( + const struct device_node *node, + const char *name) +{ + return NULL; +} + static inline int of_device_is_compatible(const struct device_node *device, const char *name) { @@ -615,12 +627,24 @@ static inline struct device_node *of_find_compatible_node( return NULL; } +static inline bool of_property_read_bool(const struct device_node *np, + const char *propname) +{ + return false; +} + static inline int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { return -ENOSYS; } +static inline int of_property_read_u16_index(const struct device_node *np, + const char *propname, u32 index, u16 *out_value) +{ + return -ENOSYS; +} + static inline int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { @@ -902,12 +926,6 @@ static inline const void *of_device_get_match_data(const struct device *dev) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif -static inline int of_prop_val_eq(const struct property *p1, const struct property *p2) -{ - return p1->length == p2->length && - !memcmp(p1->value, p2->value, (size_t)p1->length); -} - #define for_each_property_of_node(dn, pp) \ for (pp = dn->properties; pp != NULL; pp = pp->next) @@ -1243,24 +1261,6 @@ static inline int of_property_read_string_index(const struct device_node *np, } /** - * of_property_read_bool - Find a property - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * - * Search for a boolean property in a device node. Usage on non-boolean - * property types is deprecated. - * - * Return: true if the property exists false otherwise. - */ -static inline bool of_property_read_bool(const struct device_node *np, - const char *propname) -{ - const struct property *prop = of_find_property(np, propname, NULL); - - return prop ? true : false; -} - -/** * of_property_present - Test if a property is present in a node * @np: device node to search for the property. * @propname: name of the property to be searched. @@ -1271,7 +1271,9 @@ static inline bool of_property_read_bool(const struct device_node *np, */ static inline bool of_property_present(const struct device_node *np, const char *propname) { - return of_property_read_bool(np, propname); + struct property *prop = of_find_property(np, propname, NULL); + + return prop ? true : false; } /** diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 9e034363788a..0cff90365391 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -26,6 +26,7 @@ struct of_pci_range { u64 bus_addr; }; u64 cpu_addr; + u64 parent_bus_addr; u64 size; u32 flags; }; diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index a2ff1ad48f7f..17471ef8e092 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -47,8 +47,6 @@ struct of_dev_auxdata { { .compatible = _compat, .phys_addr = _phys, .name = _name, \ .platform_data = _pdata } -extern const struct of_device_id of_default_bus_match_table[]; - /* Platform drivers register/unregister */ extern struct platform_device *of_device_alloc(struct device_node *np, const char *bus_id, diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index c9e3843d2dd5..263b915df1fb 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -66,10 +66,6 @@ extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct device_node; -extern int gpmc_get_client_irq(unsigned irq_config); - -extern unsigned int gpmc_ticks_to_ns(unsigned int ticks); - extern void gpmc_cs_write_reg(int cs, int idx, u32 val); extern int gpmc_calc_divider(unsigned int sync_clk); extern int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t, diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0c7e3dcfe867..89e9d6049883 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -389,25 +389,38 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) struct_size((type *)NULL, member, count) /** - * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. - * Enables caller macro to pass (different) initializer. + * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass arbitrary trailing expressions * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: initializer expression (could be empty for no init). + * @trailer: Trailing expressions for attributes and/or initializers. */ -#define _DEFINE_FLEX(type, name, member, count, initializer...) \ +#define __DEFINE_FLEX(type, name, member, count, trailer...) \ _Static_assert(__builtin_constant_p(count), \ "onstack flex array members require compile-time const count"); \ union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u initializer; \ + } name##_u trailer; \ type *name = (type *)&name##_u /** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + __DEFINE_FLEX(type, name, member, count, = { .obj initializer }) + +/** * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing * flexible array member, when it does not have a __counted_by annotation. * @@ -421,7 +434,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Use __struct_size(@name) to get compile-time size of it afterwards. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ - _DEFINE_FLEX(type, name, member, count, = {}) + __DEFINE_FLEX(type, name, member, count, = { }) /** * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing @@ -438,6 +451,6 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Use __struct_size(@NAME) to get compile-time size of it afterwards. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ - _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, }) #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/packing.h b/include/linux/packing.h index 5d36dcd06f60..0589d70bbe04 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -8,6 +8,83 @@ #include <linux/types.h> #include <linux/bitops.h> +#define GEN_PACKED_FIELD_STRUCT(__type) \ + struct packed_field_ ## __type { \ + __type startbit; \ + __type endbit; \ + __type offset; \ + __type size; \ + } + +/* struct packed_field_u8. Use with bit offsets < 256, buffers < 32B and + * unpacked structures < 256B. + */ +GEN_PACKED_FIELD_STRUCT(u8); + +/* struct packed_field_u16. Use with bit offsets < 65536, buffers < 8KB and + * unpacked structures < 64KB. + */ +GEN_PACKED_FIELD_STRUCT(u16); + +#define PACKED_FIELD(start, end, struct_name, struct_field) \ +{ \ + (start), \ + (end), \ + offsetof(struct_name, struct_field), \ + sizeof_field(struct_name, struct_field), \ +} + +#define CHECK_PACKED_FIELD_OVERLAP(fields, index1, index2) ({ \ + typeof(&(fields)[0]) __f = (fields); \ + typeof(__f[0]) _f1 = __f[index1]; typeof(__f[0]) _f2 = __f[index2]; \ + const bool _ascending = __f[0].startbit < __f[1].startbit; \ + BUILD_BUG_ON_MSG(_ascending && _f1.startbit >= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks ascending order"); \ + BUILD_BUG_ON_MSG(!_ascending && _f1.startbit <= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks descending order"); \ + BUILD_BUG_ON_MSG(max(_f1.endbit, _f2.endbit) <= \ + min(_f1.startbit, _f2.startbit), \ + __stringify(fields) " field " __stringify(index2) \ + " overlaps with previous field"); \ +}) + +#define CHECK_PACKED_FIELD(fields, index) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(_f[0]) __f = _f[index]; \ + BUILD_BUG_ON_MSG(__f.startbit < __f.endbit, \ + __stringify(fields) " field " __stringify(index) \ + " start bit must not be smaller than end bit"); \ + BUILD_BUG_ON_MSG(__f.size != 1 && __f.size != 2 && \ + __f.size != 4 && __f.size != 8, \ + __stringify(fields) " field " __stringify(index) \ + " has unsupported unpacked storage size"); \ + BUILD_BUG_ON_MSG(__f.startbit - __f.endbit >= BITS_PER_BYTE * __f.size, \ + __stringify(fields) " field " __stringify(index) \ + " exceeds unpacked storage size"); \ + __builtin_choose_expr(index != 0, \ + CHECK_PACKED_FIELD_OVERLAP(fields, index - 1, index), \ + 1); \ +}) + +/* Note that the packed fields may be either in ascending or descending order. + * Thus, we must check that both the first and last field wit within the + * packed buffer size. + */ +#define CHECK_PACKED_FIELDS_SIZE(fields, pbuflen) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(pbuflen) _len = (pbuflen); \ + const size_t num_fields = ARRAY_SIZE(fields); \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_len), \ + __stringify(fields) " pbuflen " __stringify(pbuflen) \ + " must be a compile time constant"); \ + BUILD_BUG_ON_MSG(_f[0].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " first field exceeds packed buffer size"); \ + BUILD_BUG_ON_MSG(_f[num_fields - 1].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " last field exceeds packed buffer size"); \ +}) + #define QUIRK_MSB_ON_THE_RIGHT BIT(0) #define QUIRK_LITTLE_ENDIAN BIT(1) #define QUIRK_LSW32_IS_FIRST BIT(2) @@ -26,4 +103,352 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, size_t pbuflen, u8 quirks); +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +/* Do not hand-edit the following packed field check macros! + * + * They are generated using scripts/gen_packed_field_checks.c, which may be + * built via "make scripts_gen_packed_field_checks". If larger macro sizes are + * needed in the future, please use this program to re-generate the macros and + * insert them here. + */ + +#define CHECK_PACKED_FIELDS_1(fields) \ + CHECK_PACKED_FIELD(fields, 0) + +#define CHECK_PACKED_FIELDS_2(fields) do { \ + CHECK_PACKED_FIELDS_1(fields); \ + CHECK_PACKED_FIELD(fields, 1); \ +} while (0) + +#define CHECK_PACKED_FIELDS_3(fields) do { \ + CHECK_PACKED_FIELDS_2(fields); \ + CHECK_PACKED_FIELD(fields, 2); \ +} while (0) + +#define CHECK_PACKED_FIELDS_4(fields) do { \ + CHECK_PACKED_FIELDS_3(fields); \ + CHECK_PACKED_FIELD(fields, 3); \ +} while (0) + +#define CHECK_PACKED_FIELDS_5(fields) do { \ + CHECK_PACKED_FIELDS_4(fields); \ + CHECK_PACKED_FIELD(fields, 4); \ +} while (0) + +#define CHECK_PACKED_FIELDS_6(fields) do { \ + CHECK_PACKED_FIELDS_5(fields); \ + CHECK_PACKED_FIELD(fields, 5); \ +} while (0) + +#define CHECK_PACKED_FIELDS_7(fields) do { \ + CHECK_PACKED_FIELDS_6(fields); \ + CHECK_PACKED_FIELD(fields, 6); \ +} while (0) + +#define CHECK_PACKED_FIELDS_8(fields) do { \ + CHECK_PACKED_FIELDS_7(fields); \ + CHECK_PACKED_FIELD(fields, 7); \ +} while (0) + +#define CHECK_PACKED_FIELDS_9(fields) do { \ + CHECK_PACKED_FIELDS_8(fields); \ + CHECK_PACKED_FIELD(fields, 8); \ +} while (0) + +#define CHECK_PACKED_FIELDS_10(fields) do { \ + CHECK_PACKED_FIELDS_9(fields); \ + CHECK_PACKED_FIELD(fields, 9); \ +} while (0) + +#define CHECK_PACKED_FIELDS_11(fields) do { \ + CHECK_PACKED_FIELDS_10(fields); \ + CHECK_PACKED_FIELD(fields, 10); \ +} while (0) + +#define CHECK_PACKED_FIELDS_12(fields) do { \ + CHECK_PACKED_FIELDS_11(fields); \ + CHECK_PACKED_FIELD(fields, 11); \ +} while (0) + +#define CHECK_PACKED_FIELDS_13(fields) do { \ + CHECK_PACKED_FIELDS_12(fields); \ + CHECK_PACKED_FIELD(fields, 12); \ +} while (0) + +#define CHECK_PACKED_FIELDS_14(fields) do { \ + CHECK_PACKED_FIELDS_13(fields); \ + CHECK_PACKED_FIELD(fields, 13); \ +} while (0) + +#define CHECK_PACKED_FIELDS_15(fields) do { \ + CHECK_PACKED_FIELDS_14(fields); \ + CHECK_PACKED_FIELD(fields, 14); \ +} while (0) + +#define CHECK_PACKED_FIELDS_16(fields) do { \ + CHECK_PACKED_FIELDS_15(fields); \ + CHECK_PACKED_FIELD(fields, 15); \ +} while (0) + +#define CHECK_PACKED_FIELDS_17(fields) do { \ + CHECK_PACKED_FIELDS_16(fields); \ + CHECK_PACKED_FIELD(fields, 16); \ +} while (0) + +#define CHECK_PACKED_FIELDS_18(fields) do { \ + CHECK_PACKED_FIELDS_17(fields); \ + CHECK_PACKED_FIELD(fields, 17); \ +} while (0) + +#define CHECK_PACKED_FIELDS_19(fields) do { \ + CHECK_PACKED_FIELDS_18(fields); \ + CHECK_PACKED_FIELD(fields, 18); \ +} while (0) + +#define CHECK_PACKED_FIELDS_20(fields) do { \ + CHECK_PACKED_FIELDS_19(fields); \ + CHECK_PACKED_FIELD(fields, 19); \ +} while (0) + +#define CHECK_PACKED_FIELDS_21(fields) do { \ + CHECK_PACKED_FIELDS_20(fields); \ + CHECK_PACKED_FIELD(fields, 20); \ +} while (0) + +#define CHECK_PACKED_FIELDS_22(fields) do { \ + CHECK_PACKED_FIELDS_21(fields); \ + CHECK_PACKED_FIELD(fields, 21); \ +} while (0) + +#define CHECK_PACKED_FIELDS_23(fields) do { \ + CHECK_PACKED_FIELDS_22(fields); \ + CHECK_PACKED_FIELD(fields, 22); \ +} while (0) + +#define CHECK_PACKED_FIELDS_24(fields) do { \ + CHECK_PACKED_FIELDS_23(fields); \ + CHECK_PACKED_FIELD(fields, 23); \ +} while (0) + +#define CHECK_PACKED_FIELDS_25(fields) do { \ + CHECK_PACKED_FIELDS_24(fields); \ + CHECK_PACKED_FIELD(fields, 24); \ +} while (0) + +#define CHECK_PACKED_FIELDS_26(fields) do { \ + CHECK_PACKED_FIELDS_25(fields); \ + CHECK_PACKED_FIELD(fields, 25); \ +} while (0) + +#define CHECK_PACKED_FIELDS_27(fields) do { \ + CHECK_PACKED_FIELDS_26(fields); \ + CHECK_PACKED_FIELD(fields, 26); \ +} while (0) + +#define CHECK_PACKED_FIELDS_28(fields) do { \ + CHECK_PACKED_FIELDS_27(fields); \ + CHECK_PACKED_FIELD(fields, 27); \ +} while (0) + +#define CHECK_PACKED_FIELDS_29(fields) do { \ + CHECK_PACKED_FIELDS_28(fields); \ + CHECK_PACKED_FIELD(fields, 28); \ +} while (0) + +#define CHECK_PACKED_FIELDS_30(fields) do { \ + CHECK_PACKED_FIELDS_29(fields); \ + CHECK_PACKED_FIELD(fields, 29); \ +} while (0) + +#define CHECK_PACKED_FIELDS_31(fields) do { \ + CHECK_PACKED_FIELDS_30(fields); \ + CHECK_PACKED_FIELD(fields, 30); \ +} while (0) + +#define CHECK_PACKED_FIELDS_32(fields) do { \ + CHECK_PACKED_FIELDS_31(fields); \ + CHECK_PACKED_FIELD(fields, 31); \ +} while (0) + +#define CHECK_PACKED_FIELDS_33(fields) do { \ + CHECK_PACKED_FIELDS_32(fields); \ + CHECK_PACKED_FIELD(fields, 32); \ +} while (0) + +#define CHECK_PACKED_FIELDS_34(fields) do { \ + CHECK_PACKED_FIELDS_33(fields); \ + CHECK_PACKED_FIELD(fields, 33); \ +} while (0) + +#define CHECK_PACKED_FIELDS_35(fields) do { \ + CHECK_PACKED_FIELDS_34(fields); \ + CHECK_PACKED_FIELD(fields, 34); \ +} while (0) + +#define CHECK_PACKED_FIELDS_36(fields) do { \ + CHECK_PACKED_FIELDS_35(fields); \ + CHECK_PACKED_FIELD(fields, 35); \ +} while (0) + +#define CHECK_PACKED_FIELDS_37(fields) do { \ + CHECK_PACKED_FIELDS_36(fields); \ + CHECK_PACKED_FIELD(fields, 36); \ +} while (0) + +#define CHECK_PACKED_FIELDS_38(fields) do { \ + CHECK_PACKED_FIELDS_37(fields); \ + CHECK_PACKED_FIELD(fields, 37); \ +} while (0) + +#define CHECK_PACKED_FIELDS_39(fields) do { \ + CHECK_PACKED_FIELDS_38(fields); \ + CHECK_PACKED_FIELD(fields, 38); \ +} while (0) + +#define CHECK_PACKED_FIELDS_40(fields) do { \ + CHECK_PACKED_FIELDS_39(fields); \ + CHECK_PACKED_FIELD(fields, 39); \ +} while (0) + +#define CHECK_PACKED_FIELDS_41(fields) do { \ + CHECK_PACKED_FIELDS_40(fields); \ + CHECK_PACKED_FIELD(fields, 40); \ +} while (0) + +#define CHECK_PACKED_FIELDS_42(fields) do { \ + CHECK_PACKED_FIELDS_41(fields); \ + CHECK_PACKED_FIELD(fields, 41); \ +} while (0) + +#define CHECK_PACKED_FIELDS_43(fields) do { \ + CHECK_PACKED_FIELDS_42(fields); \ + CHECK_PACKED_FIELD(fields, 42); \ +} while (0) + +#define CHECK_PACKED_FIELDS_44(fields) do { \ + CHECK_PACKED_FIELDS_43(fields); \ + CHECK_PACKED_FIELD(fields, 43); \ +} while (0) + +#define CHECK_PACKED_FIELDS_45(fields) do { \ + CHECK_PACKED_FIELDS_44(fields); \ + CHECK_PACKED_FIELD(fields, 44); \ +} while (0) + +#define CHECK_PACKED_FIELDS_46(fields) do { \ + CHECK_PACKED_FIELDS_45(fields); \ + CHECK_PACKED_FIELD(fields, 45); \ +} while (0) + +#define CHECK_PACKED_FIELDS_47(fields) do { \ + CHECK_PACKED_FIELDS_46(fields); \ + CHECK_PACKED_FIELD(fields, 46); \ +} while (0) + +#define CHECK_PACKED_FIELDS_48(fields) do { \ + CHECK_PACKED_FIELDS_47(fields); \ + CHECK_PACKED_FIELD(fields, 47); \ +} while (0) + +#define CHECK_PACKED_FIELDS_49(fields) do { \ + CHECK_PACKED_FIELDS_48(fields); \ + CHECK_PACKED_FIELD(fields, 48); \ +} while (0) + +#define CHECK_PACKED_FIELDS_50(fields) do { \ + CHECK_PACKED_FIELDS_49(fields); \ + CHECK_PACKED_FIELD(fields, 49); \ +} while (0) + +#define CHECK_PACKED_FIELDS(fields) \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 1, ({ CHECK_PACKED_FIELDS_1(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 2, ({ CHECK_PACKED_FIELDS_2(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 3, ({ CHECK_PACKED_FIELDS_3(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 4, ({ CHECK_PACKED_FIELDS_4(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 5, ({ CHECK_PACKED_FIELDS_5(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 6, ({ CHECK_PACKED_FIELDS_6(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 7, ({ CHECK_PACKED_FIELDS_7(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 8, ({ CHECK_PACKED_FIELDS_8(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 9, ({ CHECK_PACKED_FIELDS_9(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 10, ({ CHECK_PACKED_FIELDS_10(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 11, ({ CHECK_PACKED_FIELDS_11(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 12, ({ CHECK_PACKED_FIELDS_12(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 13, ({ CHECK_PACKED_FIELDS_13(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 14, ({ CHECK_PACKED_FIELDS_14(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 15, ({ CHECK_PACKED_FIELDS_15(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 16, ({ CHECK_PACKED_FIELDS_16(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 17, ({ CHECK_PACKED_FIELDS_17(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 18, ({ CHECK_PACKED_FIELDS_18(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 19, ({ CHECK_PACKED_FIELDS_19(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 20, ({ CHECK_PACKED_FIELDS_20(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 21, ({ CHECK_PACKED_FIELDS_21(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 22, ({ CHECK_PACKED_FIELDS_22(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 23, ({ CHECK_PACKED_FIELDS_23(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 24, ({ CHECK_PACKED_FIELDS_24(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 25, ({ CHECK_PACKED_FIELDS_25(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 26, ({ CHECK_PACKED_FIELDS_26(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 27, ({ CHECK_PACKED_FIELDS_27(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 28, ({ CHECK_PACKED_FIELDS_28(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 29, ({ CHECK_PACKED_FIELDS_29(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 30, ({ CHECK_PACKED_FIELDS_30(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 31, ({ CHECK_PACKED_FIELDS_31(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 32, ({ CHECK_PACKED_FIELDS_32(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 33, ({ CHECK_PACKED_FIELDS_33(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 34, ({ CHECK_PACKED_FIELDS_34(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 35, ({ CHECK_PACKED_FIELDS_35(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 36, ({ CHECK_PACKED_FIELDS_36(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 37, ({ CHECK_PACKED_FIELDS_37(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 38, ({ CHECK_PACKED_FIELDS_38(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 39, ({ CHECK_PACKED_FIELDS_39(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 40, ({ CHECK_PACKED_FIELDS_40(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 41, ({ CHECK_PACKED_FIELDS_41(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 42, ({ CHECK_PACKED_FIELDS_42(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 43, ({ CHECK_PACKED_FIELDS_43(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 44, ({ CHECK_PACKED_FIELDS_44(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 45, ({ CHECK_PACKED_FIELDS_45(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 46, ({ CHECK_PACKED_FIELDS_46(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 47, ({ CHECK_PACKED_FIELDS_47(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 48, ({ CHECK_PACKED_FIELDS_48(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 49, ({ CHECK_PACKED_FIELDS_49(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 50, ({ CHECK_PACKED_FIELDS_50(fields); }), \ + ({ BUILD_BUG_ON_MSG(1, "CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than 50."); }) \ +)))))))))))))))))))))))))))))))))))))))))))))))))) + +/* End of generated content */ + +#define pack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : pack_fields_u8, \ + const struct packed_field_u16 * : pack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + +#define unpack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : unpack_fields_u8, \ + const struct packed_field_u16 * : unpack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 691506bdf2c5..3b814ce08331 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -110,6 +110,7 @@ enum pageflags { PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ + PG_dropbehind, /* drop pages on IO completion */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif @@ -225,11 +226,48 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page } return page; } + +static __always_inline bool page_count_writable(const struct page *page, int u) +{ + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) + return true; + + /* + * The refcount check is ordered before the fake-head check to prevent + * the following race: + * CPU 1 (HVO) CPU 2 (speculative PFN walker) + * + * page_ref_freeze() + * synchronize_rcu() + * rcu_read_lock() + * page_is_fake_head() is false + * vmemmap_remap_pte() + * XXX: struct page[] becomes r/o + * + * page_ref_unfreeze() + * page_ref_count() is not zero + * + * atomic_add_unless(&page->_refcount) + * XXX: try to modify r/o struct page[] + * + * The refcount check also prevents modification attempts to other (r/o) + * tail pages that are not fake heads. + */ + if (atomic_read_acquire(&page->_refcount) == u) + return false; + + return page_fixed_fake_head(page) == page; +} #else static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } + +static inline bool page_count_writable(const struct page *page, int u) +{ + return true; +} #endif static __always_inline int page_is_fake_head(const struct page *page) @@ -562,6 +600,10 @@ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE) +FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(dropbehind, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(dropbehind, FOLIO_HEAD_PAGE) + #ifdef CONFIG_HIGHMEM /* * Must use a macro here due to header dependency issues. page_zone() is not @@ -573,6 +615,13 @@ FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { @@ -668,12 +717,6 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) -/* - * Different with flags above, this flag is used only for fsdax mode. It - * indicates that this page->mapping is now under reflink case. - */ -#define PAGE_MAPPING_DAX_SHARED ((void *)0x1) - static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; @@ -894,21 +937,9 @@ static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } - -/* - * PageTransTail returns true for both transparent huge pages - * and hugetlbfs pages, so it should only be called when it's known - * that hugetlbfs pages aren't involved. - */ -static inline int PageTransTail(const struct page *page) -{ - return PageTail(page); -} #else TESTPAGEFLAG_FALSE(TransHuge, transhuge) TESTPAGEFLAG_FALSE(TransCompound, transcompound) -TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) -TESTPAGEFLAG_FALSE(TransTail, transtail) #endif #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) @@ -918,11 +949,9 @@ TESTPAGEFLAG_FALSE(TransTail, transtail) * * This flag is set by hwpoison handler. Cleared by THP split or free page. */ -PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) - TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +FOLIO_FLAG(has_hwpoisoned, FOLIO_SECOND_PAGE) #else -PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) - TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) +FOLIO_FLAG_FALSE(has_hwpoisoned) #endif /* @@ -934,14 +963,15 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) enum pagetype { /* 0x00-0x7f are positive numbers, ie mapcount */ /* Reserve 0x80-0xef for mapcount overflow. */ - PGTY_buddy = 0xf0, - PGTY_offline = 0xf1, - PGTY_table = 0xf2, - PGTY_guard = 0xf3, - PGTY_hugetlb = 0xf4, - PGTY_slab = 0xf5, - PGTY_zsmalloc = 0xf6, - PGTY_unaccepted = 0xf7, + PGTY_buddy = 0xf0, + PGTY_offline = 0xf1, + PGTY_table = 0xf2, + PGTY_guard = 0xf3, + PGTY_hugetlb = 0xf4, + PGTY_slab = 0xf5, + PGTY_zsmalloc = 0xf6, + PGTY_unaccepted = 0xf7, + PGTY_large_kmalloc = 0xf8, PGTY_mapcount_underflow = 0xff }; @@ -1084,6 +1114,7 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) * Serialized with zone lock. */ PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) +FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc) /** * PageHuge - Determine if the page belongs to hugetlbfs @@ -1113,6 +1144,12 @@ static inline bool is_page_hwpoison(const struct page *page) return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); } +static inline bool folio_contain_hwpoisoned_page(struct folio *folio) +{ + return folio_test_hwpoison(folio) || + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)); +} + bool is_free_buddy_page(const struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); @@ -1200,6 +1237,10 @@ static inline int folio_has_private(const struct folio *folio) return !!(folio->flags & PAGE_FLAGS_PRIVATE); } +static inline bool folio_test_large_maybe_mapped_shared(const struct folio *folio) +{ + return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids); +} #undef PF_ANY #undef PF_HEAD #undef PF_NO_TAIL diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 73dc2c1841ec..898bb788243b 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -31,7 +31,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page, int migratetype); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - int migratetype, int flags, gfp_t gfp_flags); + int migratetype, int flags); void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int migratetype); diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 79dbd8bc35a7..d649b6bbbc87 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -9,10 +9,12 @@ struct page_counter { /* - * Make sure 'usage' does not share cacheline with any other field. The - * memcg->memory.usage is a hot member of struct mem_cgroup. + * Make sure 'usage' does not share cacheline with any other field in + * v2. The memcg->memory.usage is a hot member of struct mem_cgroup. */ atomic_long_t usage; + unsigned long failcnt; /* v1-only field */ + CACHELINE_PADDING(_pad1_); /* effective memory.min and memory.min usage tracking */ @@ -28,12 +30,12 @@ struct page_counter { unsigned long watermark; /* Latest cg2 reset watermark */ unsigned long local_watermark; - unsigned long failcnt; /* Keep all the read most fields in a separete cacheline. */ CACHELINE_PADDING(_pad2_); bool protection_support; + bool track_failcnt; unsigned long min; unsigned long low; unsigned long high; @@ -58,6 +60,7 @@ static inline void page_counter_init(struct page_counter *counter, counter->max = PAGE_COUNTER_MAX; counter->parent = parent; counter->protection_support = protection_support; + counter->track_failcnt = false; } static inline unsigned long page_counter_read(struct page_counter *counter) @@ -96,7 +99,7 @@ static inline void page_counter_reset_watermark(struct page_counter *counter) counter->watermark = usage; } -#ifdef CONFIG_MEMCG +#if IS_ENABLED(CONFIG_MEMCG) || IS_ENABLED(CONFIG_CGROUP_DMEM) void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, bool recursive_protection); diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index e4b48a0dda24..76c817162d2f 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -3,6 +3,7 @@ #define __LINUX_PAGE_EXT_H #include <linux/types.h> +#include <linux/mmzone.h> #include <linux/stacktrace.h> struct pglist_data; @@ -69,16 +70,31 @@ extern void page_ext_init(void); static inline void page_ext_init_flatmem_late(void) { } + +static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn) +{ + /* + * page_ext is allocated per memory section. Once we cross a + * memory section, we have to fetch the new pointer. + */ + return next_pfn % PAGES_PER_SECTION; +} #else extern void page_ext_init_flatmem(void); extern void page_ext_init_flatmem_late(void); static inline void page_ext_init(void) { } + +static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn) +{ + return true; +} #endif extern struct page_ext *page_ext_get(const struct page *page); extern void page_ext_put(struct page_ext *page_ext); +extern struct page_ext *page_ext_lookup(unsigned long pfn); static inline void *page_ext_data(struct page_ext *page_ext, struct page_ext_operations *ops) @@ -93,6 +109,83 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr) return next; } +struct page_ext_iter { + unsigned long index; + unsigned long start_pfn; + struct page_ext *page_ext; +}; + +/** + * page_ext_iter_begin() - Prepare for iterating through page extensions. + * @iter: page extension iterator. + * @pfn: PFN of the page we're interested in. + * + * Must be called with RCU read lock taken. + * + * Return: NULL if no page_ext exists for this page. + */ +static inline struct page_ext *page_ext_iter_begin(struct page_ext_iter *iter, + unsigned long pfn) +{ + iter->index = 0; + iter->start_pfn = pfn; + iter->page_ext = page_ext_lookup(pfn); + + return iter->page_ext; +} + +/** + * page_ext_iter_next() - Get next page extension + * @iter: page extension iterator. + * + * Must be called with RCU read lock taken. + * + * Return: NULL if no next page_ext exists. + */ +static inline struct page_ext *page_ext_iter_next(struct page_ext_iter *iter) +{ + unsigned long pfn; + + if (WARN_ON_ONCE(!iter->page_ext)) + return NULL; + + iter->index++; + pfn = iter->start_pfn + iter->index; + + if (page_ext_iter_next_fast_possible(pfn)) + iter->page_ext = page_ext_next(iter->page_ext); + else + iter->page_ext = page_ext_lookup(pfn); + + return iter->page_ext; +} + +/** + * page_ext_iter_get() - Get current page extension + * @iter: page extension iterator. + * + * Return: NULL if no page_ext exists for this iterator. + */ +static inline struct page_ext *page_ext_iter_get(const struct page_ext_iter *iter) +{ + return iter->page_ext; +} + +/** + * for_each_page_ext(): iterate through page_ext objects. + * @__page: the page we're interested in + * @__pgcount: how many pages to iterate through + * @__page_ext: struct page_ext pointer where the current page_ext + * object is returned + * @__iter: struct page_ext_iter object (defined in the stack) + * + * IMPORTANT: must be called with RCU read lock taken. + */ +#define for_each_page_ext(__page, __pgcount, __page_ext, __iter) \ + for (__page_ext = page_ext_iter_begin(&__iter, page_to_pfn(__page));\ + __page_ext && __iter.index < __pgcount; \ + __page_ext = page_ext_iter_next(&__iter)) + #else /* !CONFIG_PAGE_EXTENSION */ struct page_ext; diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 8c236c651d1d..544150d1d5fd 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -234,7 +234,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u) rcu_read_lock(); /* avoid writing to the vmemmap area being remapped */ - if (!page_is_fake_head(page) && page_ref_count(page) != u) + if (page_count_writable(page, u)) ret = atomic_add_unless(&page->_refcount, nr, u); rcu_read_unlock(); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bcf0865a38ae..26baa78f1ca7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -536,26 +536,6 @@ struct address_space *folio_mapping(struct folio *); struct address_space *swapcache_mapping(struct folio *); /** - * folio_file_mapping - Find the mapping this folio belongs to. - * @folio: The folio. - * - * For folios which are in the page cache, return the mapping that this - * page belongs to. Folios in the swap cache return the mapping of the - * swap file or swap device where the data is stored. This is different - * from the mapping returned by folio_mapping(). The only reason to - * use it is if, like NFS, you return 0 from ->activate_swapfile. - * - * Do not call this for folios which aren't in the page cache or swap cache. - */ -static inline struct address_space *folio_file_mapping(struct folio *folio) -{ - if (unlikely(folio_test_swapcache(folio))) - return swapcache_mapping(folio); - - return folio->mapping; -} - -/** * folio_flush_mapping - Find the file mapping this folio belongs to. * @folio: The folio. * @@ -575,11 +555,6 @@ static inline struct address_space *folio_flush_mapping(struct folio *folio) return folio_mapping(folio); } -static inline struct address_space *page_file_mapping(struct page *page) -{ - return folio_file_mapping(page_folio(page)); -} - /** * folio_inode - Get the host inode for this folio. * @folio: The folio. @@ -710,6 +685,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, * * %FGP_NOFS - __GFP_FS will get cleared in gfp. * * %FGP_NOWAIT - Don't block on the folio lock. * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) + * * %FGP_DONTCACHE - Uncached buffered IO * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() * implementation. */ @@ -723,10 +699,21 @@ typedef unsigned int __bitwise fgf_t; #define FGP_NOWAIT ((__force fgf_t)0x00000020) #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) #define FGP_STABLE ((__force fgf_t)0x00000080) +#define FGP_DONTCACHE ((__force fgf_t)0x00000100) #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) +static inline unsigned int filemap_get_order(size_t size) +{ + unsigned int shift = ilog2(size); + + if (shift <= PAGE_SHIFT) + return 0; + + return shift - PAGE_SHIFT; +} + /** * fgf_set_order - Encode a length in the fgf_t flags. * @size: The suggested size of the folio to create. @@ -740,11 +727,11 @@ typedef unsigned int __bitwise fgf_t; */ static inline fgf_t fgf_set_order(size_t size) { - unsigned int shift = ilog2(size); + unsigned int order = filemap_get_order(size); - if (shift <= PAGE_SHIFT) + if (!order) return 0; - return (__force fgf_t)((shift - PAGE_SHIFT) << 26); + return (__force fgf_t)(order << 26); } void *filemap_get_entry(struct address_space *mapping, pgoff_t index); @@ -978,9 +965,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); -struct page *grab_cache_page_write_begin(struct address_space *mapping, - pgoff_t index); - /* * Returns locked page at given index in given cache, creating it if needed. */ @@ -1032,21 +1016,23 @@ static inline pgoff_t page_pgoff(const struct folio *folio, return folio->index + folio_page_idx(folio, page); } -/* - * Return byte-offset into filesystem object for page. +/** + * folio_pos - Returns the byte position of this folio in its file. + * @folio: The folio. */ -static inline loff_t page_offset(struct page *page) +static inline loff_t folio_pos(const struct folio *folio) { - return ((loff_t)page->index) << PAGE_SHIFT; + return ((loff_t)folio->index) * PAGE_SIZE; } -/** - * folio_pos - Returns the byte position of this folio in its file. - * @folio: The folio. +/* + * Return byte-offset into filesystem object for page. */ -static inline loff_t folio_pos(struct folio *folio) +static inline loff_t page_offset(struct page *page) { - return page_offset(&folio->page); + struct folio *folio = page_folio(page); + + return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE; } /* @@ -1233,18 +1219,12 @@ static inline int folio_wait_locked_killable(struct folio *folio) return folio_wait_bit_killable(folio, PG_locked); } -static inline void wait_on_page_locked(struct page *page) -{ - folio_wait_locked(page_folio(page)); -} - void folio_end_read(struct folio *folio, bool success); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); -void wait_for_stable_page(struct page *page); void folio_wait_stable(struct folio *folio); void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); @@ -1271,11 +1251,6 @@ void folio_wait_private_2(struct folio *folio); int folio_wait_private_2_killable(struct folio *folio); /* - * Add an arbitrary waiter to a page's wait queue - */ -void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); - -/* * Fault in userspace address range. */ size_t fault_in_writeable(char __user *uaddr, size_t size); @@ -1353,6 +1328,7 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; + bool dropbehind; bool _workingset; unsigned long _pflags; }; @@ -1595,34 +1571,6 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, } /** - * page_mkwrite_check_truncate - check if page was truncated - * @page: the page to check - * @inode: the inode to check the page against - * - * Returns the number of bytes in the page up to EOF, - * or -EFAULT if the page was truncated. - */ -static inline int page_mkwrite_check_truncate(struct page *page, - struct inode *inode) -{ - loff_t size = i_size_read(inode); - pgoff_t index = size >> PAGE_SHIFT; - int offset = offset_in_page(size); - - if (page->mapping != inode->i_mapping) - return -EFAULT; - - /* page is wholly inside EOF */ - if (page->index < index) - return PAGE_SIZE; - /* page is wholly past EOF */ - if (page->index > index || !offset) - return -EFAULT; - /* page is partially inside EOF */ - return offset; -} - -/** * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks. * @folio: The folio. diff --git a/include/linux/panic.h b/include/linux/panic.h index 54d90b6c5f47..2494d51707ef 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -74,7 +74,8 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 #define TAINT_TEST 18 -#define TAINT_FLAGS_COUNT 19 +#define TAINT_FWCTL 19 +#define TAINT_FLAGS_COUNT 20 #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index ac8c44dd8237..c5e9cac0575e 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -33,7 +33,7 @@ struct disk_stats { #define part_stat_read(part, field) \ ({ \ - typeof((part)->bd_stats->field) res = 0; \ + TYPEOF_UNQUAL((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 0e8b74e63767..75c6c86cf09d 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -42,6 +42,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_pasid_status(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ static inline int pci_enable_pasid(struct pci_dev *pdev, int features) { return -EINVAL; } @@ -50,6 +51,8 @@ static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) { return -EINVAL; } +static inline int pci_pasid_status(struct pci_dev *pdev) +{ return -EINVAL; } #endif /* CONFIG_PCI_PASID */ #endif /* LINUX_PCI_ATS_H */ diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 3a4860bd2758..3a10f8cfc3ad 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -45,6 +45,10 @@ struct pci_ecam_ops { unsigned int bus_shift; struct pci_ops pci_ops; int (*init)(struct pci_config_window *); + int (*enable_device)(struct pci_host_bridge *, + struct pci_dev *); + void (*disable_device)(struct pci_host_bridge *, + struct pci_dev *); }; /* diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index e818e3fdcded..82837008b56f 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -188,11 +188,15 @@ struct pci_epc { * enum pci_epc_bar_type - configurability of endpoint BAR * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC. * @BAR_FIXED: The BAR mask is fixed by the hardware. + * @BAR_RESIZABLE: The BAR implements the PCI-SIG Resizable BAR Capability. + * NOTE: An EPC driver can currently only set a single supported + * size. * @BAR_RESERVED: The BAR should not be touched by an EPF driver. */ enum pci_epc_bar_type { BAR_PROGRAMMABLE = 0, BAR_FIXED, + BAR_RESIZABLE, BAR_RESERVED, }; @@ -221,6 +225,7 @@ struct pci_epc_bar_desc { * @linkup_notifier: indicate if the EPC device can notify EPF driver on link up * @msi_capable: indicate if the endpoint function has MSI capability * @msix_capable: indicate if the endpoint function has MSI-X capability + * @intx_capable: indicate if the endpoint can raise INTx interrupts * @bar: array specifying the hardware description for each BAR * @align: alignment size required for BAR buffer allocation */ @@ -228,6 +233,7 @@ struct pci_epc_features { unsigned int linkup_notifier : 1; unsigned int msi_capable : 1; unsigned int msix_capable : 1; + unsigned int intx_capable : 1; struct pci_epc_bar_desc bar[PCI_STD_NUM_BARS]; size_t align; }; @@ -257,7 +263,6 @@ __devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, struct pci_epc * __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, struct module *owner); -void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc); void pci_epc_destroy(struct pci_epc *epc); int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); @@ -271,6 +276,7 @@ void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_header *hdr); +int pci_epc_bar_size_to_rebar_cap(size_t size, u32 *cap); int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar); void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no, diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 18a3aeb62ae4..749cee0bcf2c 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -38,7 +38,7 @@ enum pci_barno { * @baseclass_code: broadly classifies the type of function the device performs * @cache_line_size: specifies the system cacheline size in units of DWORDs * @subsys_vendor_id: vendor of the add-in card or subsystem - * @subsys_id: id specific to vendor + * @subsys_id: ID specific to vendor * @interrupt_pin: interrupt pin the device (or device function) uses */ struct pci_epf_header { @@ -59,7 +59,7 @@ struct pci_epf_header { * @bind: ops to perform when a EPC device has been bound to EPF device * @unbind: ops to perform when a binding has been lost between a EPC device * and EPF device - * @add_cfs: ops to initialize function specific configfs attributes + * @add_cfs: ops to initialize function-specific configfs attributes */ struct pci_epf_ops { int (*bind)(struct pci_epf *epf); @@ -114,6 +114,8 @@ struct pci_epf_driver { * @phys_addr: physical address that should be mapped to the BAR * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR + * @aligned_size: the size actually allocated to accommodate the iATU alignment + * requirement * @barno: BAR number * @flags: flags that are set for the BAR */ @@ -121,6 +123,7 @@ struct pci_epf_bar { dma_addr_t phys_addr; void *addr; size_t size; + size_t aligned_size; enum pci_barno barno; int flags; }; @@ -138,7 +141,7 @@ struct pci_epf_bar { * @epc: the EPC device to which this EPF device is bound * @epf_pf: the physical EPF device to which this virtual EPF device is bound * @driver: the EPF driver to which this EPF device is bound - * @id: Pointer to the EPF device ID + * @id: pointer to the EPF device ID * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc * @lock: mutex to protect pci_epf_ops * @sec_epc: the secondary EPC device to which this EPF device is bound @@ -151,13 +154,13 @@ struct pci_epf_bar { * @is_vf: true - virtual function, false - physical function * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function - * @event_ops: Callbacks for capturing the EPC events + * @event_ops: callbacks for capturing the EPC events */ struct pci_epf { struct device dev; const char *name; struct pci_epf_header *header; - struct pci_epf_bar bar[6]; + struct pci_epf_bar bar[PCI_STD_NUM_BARS]; u8 msi_interrupts; u16 msix_interrupts; u8 func_no; @@ -174,7 +177,7 @@ struct pci_epf { /* Below members are to attach secondary EPC to an endpoint function */ struct pci_epc *sec_epc; struct list_head sec_epc_list; - struct pci_epf_bar sec_epc_bar[6]; + struct pci_epf_bar sec_epc_bar[PCI_STD_NUM_BARS]; u8 sec_epc_func_no; struct config_group *group; unsigned int is_bound; @@ -185,11 +188,12 @@ struct pci_epf { }; /** - * struct pci_epf_msix_tbl - represents the MSIX table entry structure - * @msg_addr: Writes to this address will trigger MSIX interrupt in host - * @msg_data: Data that should be written to @msg_addr to trigger MSIX interrupt + * struct pci_epf_msix_tbl - represents the MSI-X table entry structure + * @msg_addr: Writes to this address will trigger MSI-X interrupt in host + * @msg_data: Data that should be written to @msg_addr to trigger MSI-X + * interrupt * @vector_ctrl: Identifies if the function is prohibited from sending a message - * using this MSIX table entry + * using this MSI-X table entry */ struct pci_epf_msix_tbl { u64 msg_addr; diff --git a/include/linux/pci.h b/include/linux/pci.h index f05903dd7695..081e5c0a3ddf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -245,6 +245,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + /* Device requires write to PCI_MSIX_ENTRY_DATA before any MSIX reads */ + PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST = (__force pci_dev_flags_t) (1 << 13), }; enum pci_irq_reroute_variant { @@ -353,6 +355,7 @@ struct pci_dev { struct pci_dev *rcec; /* Associated RCEC device */ #endif u32 devcap; /* PCIe Device Capabilities */ + u16 rebar_cap; /* Resizable BAR capability offset */ u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ @@ -407,7 +410,7 @@ struct pci_dev { supported from root to here */ #endif unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */ - unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ + unsigned int eetlp_prefix_max:3; /* Max # of End-End TLP Prefixes, 0=not supported */ pci_channel_state_t error_state; /* Current connectivity state */ struct device dev; /* Generic device interface */ @@ -476,6 +479,7 @@ struct pci_dev { unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ unsigned int rom_attr_enabled:1; /* Display of ROM attribute enabled? */ + unsigned int non_mappable_bars:1; /* BARs can't be mapped to user-space */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -595,6 +599,8 @@ struct pci_host_bridge { u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */ int (*map_irq)(const struct pci_dev *, u8, u8); void (*release_fn)(struct pci_host_bridge *); + int (*enable_device)(struct pci_host_bridge *bridge, struct pci_dev *dev); + void (*disable_device)(struct pci_host_bridge *bridge, struct pci_dev *dev); void *release_data; unsigned int ignore_reset_delay:1; /* For entire hierarchy */ unsigned int no_ext_tags:1; /* No Extended Tags */ @@ -679,6 +685,7 @@ struct pci_bus { struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; unsigned int unsafe_warn:1; /* warned about RW1C config write */ + unsigned int flit_mode:1; /* Link in Flit mode */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) @@ -1047,6 +1054,20 @@ struct pci_driver { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0 /** + * PCI_VDEVICE_SUB - describe a specific PCI device/subdevice in a short form + * @vend: the vendor name + * @dev: the 16 bit PCI Device ID + * @subvend: the 16 bit PCI Subvendor ID + * @subdev: the 16 bit PCI Subdevice ID + * + * Generate the pci_device_id struct layout for the specific PCI + * device/subdevice. Private data may follow the output. + */ +#define PCI_VDEVICE_SUB(vend, dev, subvend, subdev) \ + .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ + .subvendor = (subvend), .subdevice = (subdev), 0, 0 + +/** * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form * @vend: the vendor name (without PCI_VENDOR_ID_ prefix) * @dev: the device name (without PCI_DEVICE_ID_<vend>_ prefix) @@ -1380,7 +1401,6 @@ void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); -int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); void pci_release_resource(struct pci_dev *dev, int resno); static inline int pci_rebar_bytes_to_size(u64 bytes) { @@ -1439,7 +1459,6 @@ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); /* Functions for PCI Hotplug drivers to use */ -unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge); unsigned int pci_rescan_bus(struct pci_bus *bus); void pci_lock_rescan_remove(void); void pci_unlock_rescan_remove(void); @@ -1461,7 +1480,6 @@ void pci_assign_unassigned_resources(void); void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pci_assign_unassigned_bus_resources(struct pci_bus *bus); void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus); -int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type); int pci_enable_resources(struct pci_dev *, int mask); void pci_assign_irq(struct pci_dev *dev); struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); @@ -1621,7 +1639,6 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -void pci_setup_bridge(struct pci_bus *bus); resource_size_t pcibios_window_alignment(struct pci_bus *bus, unsigned long type); @@ -1833,6 +1850,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif +#ifdef CONFIG_HOTPLUG_PCI +void pci_hp_ignore_link_change(struct pci_dev *pdev); +void pci_hp_unignore_link_change(struct pci_dev *pdev); +#else +static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { } +static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } +#endif + #ifdef CONFIG_PCIEAER bool pci_aer_available(void); #else @@ -2320,8 +2345,6 @@ extern int pci_pci_problems; #define PCIPCI_ALIMAGIK 32 /* Need low latency setting */ #define PCIAGP_FAIL 64 /* No PCI to AGP DMA */ -extern unsigned long pci_cardbus_io_size; -extern unsigned long pci_cardbus_mem_size; extern u8 pci_dfl_cache_line_size; extern u8 pci_cache_line_size; @@ -2431,11 +2454,6 @@ static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int res static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif -#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) -void pci_hp_create_module_link(struct pci_slot *pci_slot); -void pci_hp_remove_module_link(struct pci_slot *pci_slot); -#endif - /** * pci_pcie_cap - get the saved PCIe capability offset * @dev: PCI device diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 3a10d6ec3ee7..ec77ccf1fc4d 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -50,7 +50,6 @@ struct hotplug_slot_ops { /** * struct hotplug_slot - used to register a physical slot with the hotplug pci core * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot - * @slot_list: internal list used to track hotplug PCI slots * @pci_slot: represents a physical slot * @owner: The module owner of this structure * @mod_name: The module name (KBUILD_MODNAME) of this structure @@ -59,7 +58,6 @@ struct hotplug_slot { const struct hotplug_slot_ops *ops; /* Variables below this are for use only by the hotplug pci core. */ - struct list_head slot_list; struct pci_slot *pci_slot; struct module *owner; const char *mod_name; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index de5deb1a0118..2e28182c3af0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -518,6 +518,7 @@ #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM 0x0251 #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361 #define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252 +#define PCI_DEVICE_ID_IBM_ISM 0x04ed #define PCI_SUBVENDOR_ID_IBM 0x1014 #define PCI_SUBDEVICE_ID_IBM_SATURN_SERIAL_ONE_PORT 0x03d4 @@ -569,6 +570,7 @@ #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 +#define PCI_DEVICE_ID_AMD_17H_M40H_DF_F3 0x13f3 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3 0x1727 @@ -2609,6 +2611,8 @@ #define PCI_VENDOR_ID_ZHAOXIN 0x1d17 +#define PCI_VENDOR_ID_ROCKCHIP 0x1d87 + #define PCI_VENDOR_ID_HYGON 0x1d94 #define PCI_VENDOR_ID_META 0x1d9b @@ -3134,6 +3138,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 #define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7 +#define PCI_DEVICE_ID_INTEL_HDA_PTL_H 0xe328 #define PCI_DEVICE_ID_INTEL_HDA_PTL 0xe428 #define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 diff --git a/include/linux/pcie-dwc.h b/include/linux/pcie-dwc.h new file mode 100644 index 000000000000..8ff778e7aec0 --- /dev/null +++ b/include/linux/pcie-dwc.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021-2023 Alibaba Inc. + * Copyright (C) 2025 Linaro Ltd. + * + * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> + */ + +#ifndef LINUX_PCIE_DWC_H +#define LINUX_PCIE_DWC_H + +#include <linux/pci_ids.h> + +struct dwc_pcie_vsec_id { + u16 vendor_id; + u16 vsec_id; + u8 vsec_rev; +}; + +/* + * VSEC IDs are allocated by the vendor, so a given ID may mean different + * things to different vendors. See PCIe r6.0, sec 7.9.5.2. + */ +static const struct dwc_pcie_vsec_id dwc_pcie_rasdes_vsec_ids[] = { + { .vendor_id = PCI_VENDOR_ID_ALIBABA, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_AMPERE, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_QCOM, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_ROCKCHIP, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_SAMSUNG, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + {} +}; + +#endif /* LINUX_PCIE_DWC_H */ diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index b5b5d17998b8..e40f554ff717 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -50,9 +50,7 @@ struct dw_xpcs; struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); -int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, - int enable); +void xpcs_config_eee_mult_fact(struct dw_xpcs *xpcs, u8 mult_fact); struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr); struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode); void xpcs_destroy(struct dw_xpcs *xpcs); diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index 4b4e9a98b37b..ddd111f04ca0 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -1179,6 +1179,274 @@ struct pds_lm_host_vf_status_cmd { u8 status; }; +enum pds_fwctl_cmd_opcode { + PDS_FWCTL_CMD_IDENT = 70, + PDS_FWCTL_CMD_RPC = 71, + PDS_FWCTL_CMD_QUERY = 72, +}; + +/** + * struct pds_fwctl_cmd - Firmware control command structure + * @opcode: Opcode + * @rsvd: Reserved + * @ep: Endpoint identifier + * @op: Operation identifier + */ +struct pds_fwctl_cmd { + u8 opcode; + u8 rsvd[3]; + __le32 ep; + __le32 op; +} __packed; + +/** + * struct pds_fwctl_comp - Firmware control completion structure + * @status: Status of the firmware control operation + * @rsvd: Reserved + * @comp_index: Completion index in little-endian format + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 rsvd2[11]; + u8 color; +} __packed; + +/** + * struct pds_fwctl_ident_cmd - Firmware control identification command structure + * @opcode: Operation code for the command + * @rsvd: Reserved + * @version: Interface version + * @rsvd2: Reserved + * @len: Length of the identification data + * @ident_pa: Physical address of the identification data + */ +struct pds_fwctl_ident_cmd { + u8 opcode; + u8 rsvd; + u8 version; + u8 rsvd2; + __le32 len; + __le64 ident_pa; +} __packed; + +/* future feature bits here + * enum pds_fwctl_features { + * }; + * (compilers don't like empty enums) + */ + +/** + * struct pds_fwctl_ident - Firmware control identification structure + * @features: Supported features (enum pds_fwctl_features) + * @version: Interface version + * @rsvd: Reserved + * @max_req_sz: Maximum request size + * @max_resp_sz: Maximum response size + * @max_req_sg_elems: Maximum number of request SGs + * @max_resp_sg_elems: Maximum number of response SGs + */ +struct pds_fwctl_ident { + __le64 features; + u8 version; + u8 rsvd[3]; + __le32 max_req_sz; + __le32 max_resp_sz; + u8 max_req_sg_elems; + u8 max_resp_sg_elems; +} __packed; + +enum pds_fwctl_query_entity { + PDS_FWCTL_RPC_ROOT = 0, + PDS_FWCTL_RPC_ENDPOINT = 1, + PDS_FWCTL_RPC_OPERATION = 2, +}; + +#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT 0 +#define PDS_FWCTL_RPC_OPCODE_CMD_MASK GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT) +#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT 16 +#define PDS_FWCTL_RPC_OPCODE_VER_MASK GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT) + +#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_CMD_MASK, op) +#define PDS_FWCTL_RPC_OPCODE_GET_VER(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_VER_MASK, op) + +#define PDS_FWCTL_RPC_OPCODE_CMP(op1, op2) \ + (PDS_FWCTL_RPC_OPCODE_GET_CMD(op1) == PDS_FWCTL_RPC_OPCODE_GET_CMD(op2) && \ + PDS_FWCTL_RPC_OPCODE_GET_VER(op1) <= PDS_FWCTL_RPC_OPCODE_GET_VER(op2)) + +/* + * FW command attributes that map to the FWCTL scope values + */ +#define PDSFC_FW_CMD_ATTR_READ 0x00 +#define PDSFC_FW_CMD_ATTR_DEBUG_READ 0x02 +#define PDSFC_FW_CMD_ATTR_WRITE 0x04 +#define PDSFC_FW_CMD_ATTR_DEBUG_WRITE 0x08 +#define PDSFC_FW_CMD_ATTR_SYNC 0x10 + +/** + * struct pds_fwctl_query_cmd - Firmware control query command structure + * @opcode: Operation code for the command + * @entity: Entity type to query (enum pds_fwctl_query_entity) + * @version: Version of the query data structure supported by the driver + * @rsvd: Reserved + * @query_data_buf_len: Length of the query data buffer + * @query_data_buf_pa: Physical address of the query data buffer + * @ep: Endpoint identifier to query (when entity is PDS_FWCTL_RPC_ENDPOINT) + * @op: Operation identifier to query (when entity is PDS_FWCTL_RPC_OPERATION) + * + * This structure is used to send a query command to the firmware control + * interface. The structure is packed to ensure there is no padding between + * the fields. + */ +struct pds_fwctl_query_cmd { + u8 opcode; + u8 entity; + u8 version; + u8 rsvd; + __le32 query_data_buf_len; + __le64 query_data_buf_pa; + union { + __le32 ep; + __le32 op; + }; +} __packed; + +/** + * struct pds_fwctl_query_comp - Firmware control query completion structure + * @status: Status of the query command + * @rsvd: Reserved + * @comp_index: Completion index in little-endian format + * @version: Version of the query data structure returned by firmware. This + * should be less than or equal to the version supported by the driver + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_query_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 version; + u8 rsvd2[2]; + u8 color; +} __packed; + +/** + * struct pds_fwctl_query_data_endpoint - query data for entity PDS_FWCTL_RPC_ROOT + * @id: The identifier for the data endpoint + */ +struct pds_fwctl_query_data_endpoint { + __le32 id; +} __packed; + +/** + * struct pds_fwctl_query_data_operation - query data for entity PDS_FWCTL_RPC_ENDPOINT + * @id: Operation identifier + * @scope: Scope of the operation (enum fwctl_rpc_scope) + * @rsvd: Reserved + */ +struct pds_fwctl_query_data_operation { + __le32 id; + u8 scope; + u8 rsvd[3]; +} __packed; + +/** + * struct pds_fwctl_query_data - query data structure + * @version: Version of the query data structure + * @rsvd: Reserved + * @num_entries: Number of entries in the union + * @entries: Array of query data entries, depending on the entity type + */ +struct pds_fwctl_query_data { + u8 version; + u8 rsvd[3]; + __le32 num_entries; + u8 entries[] __counted_by_le(num_entries); +} __packed; + +/** + * struct pds_fwctl_rpc_cmd - Firmware control RPC command + * @opcode: opcode PDS_FWCTL_CMD_RPC + * @rsvd: Reserved + * @flags: Indicates indirect request and/or response handling + * @ep: Endpoint identifier + * @op: Operation identifier + * @inline_req0: Buffer for inline request + * @inline_req1: Buffer for inline request + * @req_pa: Physical address of request data + * @req_sz: Size of the request + * @req_sg_elems: Number of request SGs + * @req_rsvd: Reserved + * @inline_req2: Buffer for inline request + * @resp_pa: Physical address of response data + * @resp_sz: Size of the response + * @resp_sg_elems: Number of response SGs + * @resp_rsvd: Reserved + */ +struct pds_fwctl_rpc_cmd { + u8 opcode; + u8 rsvd; + __le16 flags; +#define PDS_FWCTL_RPC_IND_REQ 0x1 +#define PDS_FWCTL_RPC_IND_RESP 0x2 + __le32 ep; + __le32 op; + u8 inline_req0[16]; + union { + u8 inline_req1[16]; + struct { + __le64 req_pa; + __le32 req_sz; + u8 req_sg_elems; + u8 req_rsvd[3]; + }; + }; + union { + u8 inline_req2[16]; + struct { + __le64 resp_pa; + __le32 resp_sz; + u8 resp_sg_elems; + u8 resp_rsvd[3]; + }; + }; +} __packed; + +/** + * struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + * @rsvd: Reserved + */ +struct pds_sg_elem { + __le64 addr; + __le32 len; + u8 rsvd[4]; +} __packed; + +/** + * struct pds_fwctl_rpc_comp - Completion of a firmware control RPC + * @status: Status of the command + * @rsvd: Reserved + * @comp_index: Completion index of the command + * @err: Error code, if any, from the RPC + * @resp_sz: Size of the response + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_rpc_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + __le32 err; + __le32 resp_sz; + u8 rsvd2[3]; + u8 color; +} __packed; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -1216,6 +1484,11 @@ union pds_core_adminq_cmd { struct pds_lm_dirty_enable_cmd lm_dirty_enable; struct pds_lm_dirty_disable_cmd lm_dirty_disable; struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack; + + struct pds_fwctl_cmd fwctl; + struct pds_fwctl_ident_cmd fwctl_ident; + struct pds_fwctl_rpc_cmd fwctl_rpc; + struct pds_fwctl_query_cmd fwctl_query; }; union pds_core_adminq_comp { @@ -1243,6 +1516,10 @@ union pds_core_adminq_comp { struct pds_lm_state_size_comp lm_state_size; struct pds_lm_dirty_status_comp lm_dirty_status; + + struct pds_fwctl_comp fwctl; + struct pds_fwctl_rpc_comp fwctl_rpc; + struct pds_fwctl_query_comp fwctl_query; }; #ifndef __CHECKER__ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 5802e1deef24..b193adbe7cc3 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -29,6 +29,7 @@ enum pds_core_vif_types { PDS_DEV_TYPE_ETH = 3, PDS_DEV_TYPE_RDMA = 4, PDS_DEV_TYPE_LM = 5, + PDS_DEV_TYPE_FWCTL = 6, /* new ones added before this line */ PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */ @@ -40,6 +41,7 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_ETH_STR "Eth" #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" +#define PDS_DEV_TYPE_FWCTL_STR "fwctl" #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR #define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5b520fe86b60..0aeb0e276a3e 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -26,13 +26,11 @@ #define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" #endif -#define PER_CPU_FIRST_SECTION "..first" #else #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" -#define PER_CPU_FIRST_SECTION "" #endif @@ -115,14 +113,17 @@ DEFINE_PER_CPU_SECTION(type, name, "") /* - * Declaration/definition used for per-CPU variables that must come first in - * the set of variables. + * Declaration/definition used for per-CPU variables that are frequently + * accessed and should be in a single cacheline. + * + * For use only by architecture and core code. Only use scalar or pointer + * types to maximize density. */ -#define DECLARE_PER_CPU_FIRST(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) +#define DECLARE_PER_CPU_CACHE_HOT(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name) -#define DEFINE_PER_CPU_FIRST(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) +#define DEFINE_PER_CPU_CACHE_HOT(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name) /* * Declaration/definition used for per-CPU variables that must be cacheline @@ -221,7 +222,7 @@ do { \ } while (0) #define PERCPU_PTR(__p) \ - (typeof(*(__p)) __force __kernel *)((__force unsigned long)(__p)) + (TYPEOF_UNQUAL(*(__p)) __force __kernel *)((__force unsigned long)(__p)) #ifdef CONFIG_SMP @@ -317,7 +318,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { } #define __pcpu_size_call_return(stem, variable) \ ({ \ - typeof(variable) pscr_ret__; \ + TYPEOF_UNQUAL(variable) pscr_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr_ret__ = stem##1(variable); break; \ @@ -332,7 +333,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { } #define __pcpu_size_call_return2(stem, variable, ...) \ ({ \ - typeof(variable) pscr2_ret__; \ + TYPEOF_UNQUAL(variable) pscr2_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index c012df33a9f0..af7d75ede619 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -8,6 +8,7 @@ #include <linux/wait.h> #include <linux/rcu_sync.h> #include <linux/lockdep.h> +#include <linux/cleanup.h> struct percpu_rw_semaphore { struct rcu_sync rss; @@ -125,6 +126,13 @@ extern bool percpu_is_read_locked(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); +DEFINE_GUARD(percpu_read, struct percpu_rw_semaphore *, + percpu_down_read(_T), percpu_up_read(_T)) +DEFINE_GUARD_COND(percpu_read, _try, percpu_down_read_trylock(_T)) + +DEFINE_GUARD(percpu_write, struct percpu_rw_semaphore *, + percpu_down_write(_T), percpu_up_write(_T)) + static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem) { return atomic_read(&sem->block); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 52b5ea663b9f..85bf8dd9f087 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -15,11 +15,7 @@ /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#ifdef CONFIG_MEM_ALLOC_PROFILING -#define PERCPU_MODULE_RESERVE (8 << 13) -#else #define PERCPU_MODULE_RESERVE (8 << 10) -#endif #else #define PERCPU_MODULE_RESERVE 0 #endif diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4b5b83677e3f..6dc5e0cd76ca 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -84,7 +84,6 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; - int pmuver; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event); @@ -100,20 +99,26 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + /* + * Called by KVM to map the PMUv3 event space onto non-PMUv3 hardware. + */ + int (*map_pmuv3_event)(unsigned int eventsel); DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS); bool secure_access; /* 32-bit ARM only */ -#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 - DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); -#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 - DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct hlist_node node; struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; - /* store the PMMIR_EL1 to expose slots */ + + /* PMUv3 only */ + int pmuver; u64 reg_pmmir; +#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 + DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); +#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 + DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f7c0a3f2f502..0069ba6866a4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -343,8 +343,7 @@ struct pmu { */ unsigned int scope; - int __percpu *pmu_disable_count; - struct perf_cpu_pmu_context __percpu *cpu_pmu_context; + struct perf_cpu_pmu_context * __percpu *cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; @@ -495,7 +494,7 @@ struct pmu { * context-switches callback */ void (*sched_task) (struct perf_event_pmu_context *pmu_ctx, - bool sched_in); + struct task_struct *task, bool sched_in); /* * Kmem cache of PMU specific data @@ -503,16 +502,6 @@ struct pmu { struct kmem_cache *task_ctx_cache; /* - * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data) - * can be synchronized using this function. See Intel LBR callstack support - * implementation and Perf core context switch handling callbacks for usage - * examples. - */ - void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - /* optional */ - - /* * Set up pmu-private data structures for an AUX area */ void *(*setup_aux) (struct perf_event *event, void **pages, @@ -673,13 +662,16 @@ struct swevent_hlist { struct rcu_head rcu_head; }; -#define PERF_ATTACH_CONTEXT 0x01 -#define PERF_ATTACH_GROUP 0x02 -#define PERF_ATTACH_TASK 0x04 -#define PERF_ATTACH_TASK_DATA 0x08 -#define PERF_ATTACH_ITRACE 0x10 -#define PERF_ATTACH_SCHED_CB 0x20 -#define PERF_ATTACH_CHILD 0x40 +#define PERF_ATTACH_CONTEXT 0x0001 +#define PERF_ATTACH_GROUP 0x0002 +#define PERF_ATTACH_TASK 0x0004 +#define PERF_ATTACH_TASK_DATA 0x0008 +#define PERF_ATTACH_GLOBAL_DATA 0x0010 +#define PERF_ATTACH_SCHED_CB 0x0020 +#define PERF_ATTACH_CHILD 0x0040 +#define PERF_ATTACH_EXCLUSIVE 0x0080 +#define PERF_ATTACH_CALLCHAIN 0x0100 +#define PERF_ATTACH_ITRACE 0x0200 struct bpf_prog; struct perf_cgroup; @@ -831,7 +823,6 @@ struct perf_event { struct irq_work pending_disable_irq; struct callback_head pending_task; unsigned int pending_work; - struct rcuwait pending_work_wait; atomic_t event_limit; @@ -921,7 +912,7 @@ struct perf_event_pmu_context { struct list_head pinned_active; struct list_head flexible_active; - /* Used to avoid freeing per-cpu perf_event_pmu_context */ + /* Used to identify the per-cpu perf_event_pmu_context */ unsigned int embedded : 1; unsigned int nr_events; @@ -931,7 +922,6 @@ struct perf_event_pmu_context { atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; - void *task_ctx_data; /* pmu specific data */ /* * Set when one or more (plausibly active) event can't be scheduled * due to pmu overcommit or pmu constraints, except tolerant to @@ -979,7 +969,6 @@ struct perf_event_context { int nr_user; int is_active; - int nr_task_data; int nr_stat; int nr_freq; int rotate_disable; @@ -1020,6 +1009,41 @@ struct perf_event_context { local_t nr_no_switch_fast; }; +/** + * struct perf_ctx_data - PMU specific data for a task + * @rcu_head: To avoid the race on free PMU specific data + * @refcount: To track users + * @global: To track system-wide users + * @ctx_cache: Kmem cache of PMU specific data + * @data: PMU specific data + * + * Currently, the struct is only used in Intel LBR call stack mode to + * save/restore the call stack of a task on context switches. + * + * The rcu_head is used to prevent the race on free the data. + * The data only be allocated when Intel LBR call stack mode is enabled. + * The data will be freed when the mode is disabled. + * The content of the data will only be accessed in context switch, which + * should be protected by rcu_read_lock(). + * + * Because of the alignment requirement of Intel Arch LBR, the Kmem cache + * is used to allocate the PMU specific data. The ctx_cache is to track + * the Kmem cache. + * + * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct. + * When system-wide Intel LBR call stack mode is enabled, a buffer with + * constant size will be allocated for each task. + * Also, system memory consumption can further grow when the size of + * struct perf_ctx_data enlarges. + */ +struct perf_ctx_data { + struct rcu_head rcu_head; + refcount_t refcount; + int global; + struct kmem_cache *ctx_cache; + void *data; +}; + struct perf_cpu_pmu_context { struct perf_event_pmu_context epc; struct perf_event_pmu_context *task_epc; @@ -1029,6 +1053,7 @@ struct perf_cpu_pmu_context { int active_oncpu; int exclusive; + int pmu_disable_count; raw_spinlock_t hrtimer_lock; struct hrtimer hrtimer; @@ -1062,7 +1087,13 @@ struct perf_output_handle { struct perf_buffer *rb; unsigned long wakeup; unsigned long size; - u64 aux_flags; + union { + u64 flags; /* perf_output*() */ + u64 aux_flags; /* perf_aux_output*() */ + struct { + u64 skip_read : 1; + }; + }; union { void *addr; unsigned long head; @@ -1279,6 +1310,11 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data, { int size = 1; + if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) + return; + data->callchain = perf_callchain(event, regs); size += data->callchain->nr; @@ -1315,6 +1351,11 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, data->sample_flags |= PERF_SAMPLE_RAW; } +static inline bool has_branch_stack(struct perf_event *event) +{ + return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; +} + static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, struct perf_branch_stack *brs, @@ -1322,8 +1363,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, { int size = sizeof(u64); /* nr */ + if (!has_branch_stack(event)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK)) + return; + if (branch_sample_hw_index(event)) size += sizeof(u64); + + brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr); + size += brs->nr * sizeof(struct perf_branch_entry); /* @@ -1631,19 +1680,10 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 } extern int sysctl_perf_event_paranoid; -extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; -extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int perf_event_max_stack_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); - /* Access to perf_event_open(2) syscall. */ #define PERF_SECURITY_OPEN 0 @@ -1657,24 +1697,26 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } -int perf_allow_kernel(struct perf_event_attr *attr); +int perf_allow_kernel(void); -static inline int perf_allow_cpu(struct perf_event_attr *attr) +static inline int perf_allow_cpu(void) { if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) return -EACCES; - return security_perf_event_open(attr, PERF_SECURITY_CPU); + return security_perf_event_open(PERF_SECURITY_CPU); } -static inline int perf_allow_tracepoint(struct perf_event_attr *attr) +static inline int perf_allow_tracepoint(void) { if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) return -EPERM; - return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); + return security_perf_event_open(PERF_SECURITY_TRACEPOINT); } +extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs); + extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, @@ -1711,11 +1753,6 @@ static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) # define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) #endif -static inline bool has_branch_stack(struct perf_event *event) -{ - return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; -} - static inline bool needs_branch_stack(struct perf_event *event) { return event->attr.branch_sample_type != 0; @@ -1885,6 +1922,10 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) +{ + return 0; +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 3469c4b20105..8a7f4f802c57 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -162,72 +162,37 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code } } -static inline void clear_page_tag_ref(struct page *page) -{ - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - set_codetag_empty(&ref); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } -} +/* Should be called only if mem_alloc_profiling_enabled() */ +void __clear_page_tag_ref(struct page *page); -static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, - unsigned int nr) +static inline void clear_page_tag_ref(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } + if (mem_alloc_profiling_enabled()) + __clear_page_tag_ref(page); } -static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) -{ - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_sub(&ref, PAGE_SIZE * nr); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } -} - -static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) { struct alloc_tag *tag = NULL; - - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_sub_check(&ref); - if (ref.ct) - tag = ct_to_alloc_tag(ref.ct); - put_page_tag_ref(handle); - } + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub_check(&ref); + if (ref.ct) + tag = ct_to_alloc_tag(ref.ct); + put_page_tag_ref(handle); } return tag; } -static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { - if (mem_alloc_profiling_enabled() && tag) - this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); + if (mem_alloc_profiling_enabled()) + return __pgalloc_tag_get(page); + return NULL; } void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); @@ -238,14 +203,10 @@ void __init alloc_tag_sec_init(void); #else /* CONFIG_MEM_ALLOC_PROFILING */ static inline void clear_page_tag_ref(struct page *page) {} -static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, - unsigned int nr) {} -static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } -static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index adef9d6e9b1b..b50447ef1c92 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -222,10 +222,14 @@ static inline int pmd_dirty(pmd_t pmd) * hazard could result in the direct mode hypervisor case, since the actual * write to the page tables may not yet have taken place, so reads though * a raw PTE pointer after it has been modified are not guaranteed to be - * up to date. This mode can only be entered and left under the protection of - * the page table locks for all page tables which may be modified. In the UP - * case, this is required so that preemption is disabled, and in the SMP case, - * it must synchronize the delayed page table writes properly on other CPUs. + * up to date. + * + * In the general case, no lock is guaranteed to be held between entry and exit + * of the lazy mode. So the implementation must assume preemption may be enabled + * and cpu migration is possible; it must take steps to be robust against this. + * (In practice, for user PTE updates, the appropriate page table lock(s) are + * held, but for kernel PTE updates, no lock is held). Nesting is not permitted + * and the mode cannot be used in interrupt context. */ #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE #define arch_enter_lazy_mmu_mode() do {} while (0) @@ -287,7 +291,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, { page_table_check_ptes_set(mm, ptep, pte, nr); - arch_enter_lazy_mmu_mode(); for (;;) { set_pte(ptep, pte); if (--nr == 0) @@ -295,7 +298,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, ptep++; pte = pte_next_pfn(pte); } - arch_leave_lazy_mmu_mode(); } #endif #define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) @@ -533,7 +535,14 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep_get_and_clear(mm, addr, ptep); + pte_t pte = ptep_get(ptep); + + pte_clear(mm, addr, ptep); + /* + * No need for ptep_get_and_clear(): page table check doesn't care about + * any bits that could have been set by HW concurrently. + */ + page_table_check_pte_clear(mm, pte); } #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH @@ -1501,15 +1510,29 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, } /* - * track_pfn_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). + * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page + * tables copied during copy_page_range(). Will store the pfn to be + * passed to untrack_pfn_copy() only if there is something to be untracked. + * Callers should initialize the pfn to 0. */ -static inline int track_pfn_copy(struct vm_area_struct *vma) +static inline int track_pfn_copy(struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, unsigned long *pfn) { return 0; } /* + * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during + * copy_page_range(), but after track_pfn_copy() was already called. Can + * be called even if track_pfn_copy() did not actually track anything: + * handled internally. + */ +static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, + unsigned long pfn) +{ +} + +/* * untrack_pfn is called while unmapping a pfnmap for a region. * untrack can be called for a specific region indicated by pfn and size or * can be for the entire vma (in which case pfn, size are zero). @@ -1521,8 +1544,10 @@ static inline void untrack_pfn(struct vm_area_struct *vma, } /* - * untrack_pfn_clear is called while mremapping a pfnmap for a new region - * or fails to copy pgtable during duplicate vm area. + * untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA: + * + * 1) During mremap() on the src VMA after the page tables were moved. + * 2) During fork() on the dst VMA, immediately after duplicating the src VMA. */ static inline void untrack_pfn_clear(struct vm_area_struct *vma) { @@ -1533,7 +1558,10 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, unsigned long size); extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn); -extern int track_pfn_copy(struct vm_area_struct *vma); +extern int track_pfn_copy(struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, unsigned long *pfn); +extern void untrack_pfn_copy(struct vm_area_struct *dst_vma, + unsigned long pfn); extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked); extern void untrack_pfn_clear(struct vm_area_struct *vma); diff --git a/include/linux/phy.h b/include/linux/phy.h index bed2b81f8fb3..bef68f6af99a 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -32,28 +32,12 @@ #include <linux/atomic.h> #include <net/eee.h> -#define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ - SUPPORTED_TP | \ - SUPPORTED_MII) - -#define PHY_10BT_FEATURES (SUPPORTED_10baseT_Half | \ - SUPPORTED_10baseT_Full) - -#define PHY_100BT_FEATURES (SUPPORTED_100baseT_Half | \ - SUPPORTED_100baseT_Full) - -#define PHY_1000BT_FEATURES (SUPPORTED_1000baseT_Half | \ - SUPPORTED_1000baseT_Full) - extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1s_p2mp_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; -extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; -extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; -extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; @@ -62,21 +46,11 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_BASIC_T1S_P2MP_FEATURES ((unsigned long *)&phy_basic_t1s_p2mp_features) #define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features) #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) -#define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) -#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) -#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) #define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) #define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; -extern const int phy_fibre_port_array[1]; -extern const int phy_all_ports_features_array[7]; -extern const int phy_10_100_features_array[4]; -extern const int phy_basic_t1_features_array[3]; -extern const int phy_basic_t1s_p2mp_features_array[2]; -extern const int phy_gbit_features_array[2]; -extern const int phy_10gbit_features_array[1]; /* * Set phydev->irq to PHY_POLL if interrupts are not supported, @@ -107,7 +81,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay - * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay + * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal TX delay * @PHY_INTERFACE_MODE_RTBI: Reduced TBI * @PHY_INTERFACE_MODE_SMII: Serial MII * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface @@ -207,13 +181,6 @@ static inline void phy_interface_set_rgmii(unsigned long *intf) __set_bit(PHY_INTERFACE_MODE_RGMII_TXID, intf); } -/* - * phy_supported_speeds - return all speeds currently supported by a PHY device - */ -unsigned int phy_supported_speeds(struct phy_device *phy, - unsigned int *speeds, - unsigned int size); - /** * phy_modes - map phy_interface_t enum to device tree binding of phy-mode * @interface: enum phy_interface_t value @@ -298,13 +265,34 @@ static inline const char *phy_modes(phy_interface_t interface) } } -#define PHY_INIT_TIMEOUT 100000 -#define PHY_FORCE_TIMEOUT 10 +/** + * rgmii_clock - map link speed to the clock rate + * @speed: link speed value + * + * Description: maps RGMII supported link speeds + * into the clock rates. + * + * Returns: clock rate or negative errno + */ +static inline long rgmii_clock(int speed) +{ + switch (speed) { + case SPEED_10: + return 2500000; + case SPEED_100: + return 25000000; + case SPEED_1000: + return 125000000; + default: + return -EINVAL; + } +} #define PHY_MAX_ADDR 32 /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ #define PHY_ID_FMT "%s:%02x" +#define PHY_ID_SIZE (MII_BUS_ID_SIZE + 3) #define MII_BUS_ID_SIZE 61 @@ -333,41 +321,6 @@ struct mdio_bus_stats { }; /** - * struct phy_package_shared - Shared information in PHY packages - * @base_addr: Base PHY address of PHY package used to combine PHYs - * in one package and for offset calculation of phy_package_read/write - * @np: Pointer to the Device Node if PHY package defined in DT - * @refcnt: Number of PHYs connected to this shared data - * @flags: Initialization of PHY package - * @priv_size: Size of the shared private data @priv - * @priv: Driver private data shared across a PHY package - * - * Represents a shared structure between different phydev's in the same - * package, for example a quad PHY. See phy_package_join() and - * phy_package_leave(). - */ -struct phy_package_shared { - u8 base_addr; - /* With PHY package defined in DT this points to the PHY package node */ - struct device_node *np; - refcount_t refcnt; - unsigned long flags; - size_t priv_size; - - /* private data pointer */ - /* note that this pointer is shared between different phydevs and - * the user has to take care of appropriate locking. It is allocated - * and freed automatically by phy_package_join() and - * phy_package_leave(). - */ - void *priv; -}; - -/* used as bit number in atomic bitops */ -#define PHY_SHARED_F_INIT_DONE 0 -#define PHY_SHARED_F_PROBE_DONE 1 - -/** * struct mii_bus - Represents an MDIO bus * * @owner: Who owns this device @@ -606,7 +559,7 @@ struct macsec_ops; * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host - * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited + * @eee_disabled_modes: Energy efficient ethernet modes not to be advertised * @autoneg: Flag autoneg being used * @rate_matching: Current rate matching mode * @link: Current link state @@ -722,7 +675,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); /* Energy efficient ethernet modes which should be prohibited */ - __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); + __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes); bool enable_tx_lpi; bool eee_active; struct eee_config eee_cfg; @@ -791,10 +744,7 @@ struct phy_device { #define PHY_F_NO_IRQ 0x80000000 #define PHY_F_RXC_ALWAYS_ON 0x40000000 -static inline struct phy_device *to_phy_device(const struct device *dev) -{ - return container_of(to_mdio_device(dev), struct phy_device, mdio); -} +#define to_phy_device(__dev) container_of_const(to_mdio_device(__dev), struct phy_device, mdio) /** * struct phy_tdr_config - Configuration of a TDR raw test @@ -818,6 +768,24 @@ struct phy_tdr_config { #define PHY_PAIR_ALL -1 /** + * enum link_inband_signalling - in-band signalling modes that are supported + * + * @LINK_INBAND_DISABLE: in-band signalling can be disabled + * @LINK_INBAND_ENABLE: in-band signalling can be enabled without bypass + * @LINK_INBAND_BYPASS: in-band signalling can be enabled with bypass + * + * The possible and required bits can only be used if the valid bit is set. + * If possible is clear, that means inband signalling can not be used. + * Required is only valid when possible is set, and means that inband + * signalling must be used. + */ +enum link_inband_signalling { + LINK_INBAND_DISABLE = BIT(0), + LINK_INBAND_ENABLE = BIT(1), + LINK_INBAND_BYPASS = BIT(2), +}; + +/** * struct phy_plca_cfg - Configuration of the PLCA (Physical Layer Collision * Avoidance) Reconciliation Sublayer. * @@ -957,6 +925,19 @@ struct phy_driver { int (*get_features)(struct phy_device *phydev); /** + * @inband_caps: query whether in-band is supported for the given PHY + * interface mode. Returns a bitmask of bits defined by enum + * link_inband_signalling. + */ + unsigned int (*inband_caps)(struct phy_device *phydev, + phy_interface_t interface); + + /** + * @config_inband: configure in-band mode for the PHY + */ + int (*config_inband)(struct phy_device *phydev, unsigned int modes); + + /** * @get_rate_matching: Get the supported type of rate matching for a * particular phy interface. This is used by phy consumers to determine * whether to advertise lower-speed modes for that interface. It is @@ -1119,6 +1100,24 @@ struct phy_driver { */ void (*get_link_stats)(struct phy_device *dev, struct ethtool_link_ext_stats *link_stats); + + /** + * @update_stats: Trigger periodic statistics updates. + * @dev: The PHY device for which statistics updates are triggered. + * + * Periodically gathers statistics from the PHY device to update locally + * maintained 64-bit counters. This is necessary for PHYs that implement + * reduced-width counters (e.g., 16-bit or 32-bit) which can overflow + * more frequently compared to 64-bit counters. By invoking this + * callback, drivers can fetch the current counter values, handle + * overflow detection, and accumulate the results into local 64-bit + * counters for accurate reporting through the `get_phy_stats` and + * `get_link_stats` interfaces. + * + * Return: 0 on success or a negative error code on failure. + */ + int (*update_stats)(struct phy_device *dev); + /** @get_sset_count: Number of statistic counters */ int (*get_sset_count)(struct phy_device *dev); /** @get_strings: Names of the statistic counters */ @@ -1135,8 +1134,16 @@ struct phy_driver { int (*set_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, const void *data); - /** @set_loopback: Set the loopback mood of the PHY */ - int (*set_loopback)(struct phy_device *dev, bool enable); + /** + * @set_loopback: Set the loopback mode of the PHY + * enable selects if the loopback mode is enabled or disabled. If the + * loopback mode is enabled, then the speed of the loopback mode can be + * requested with the speed argument. If the speed argument is zero, + * then any speed can be selected. If the speed argument is > 0, then + * this speed shall be selected for the loopback mode or EOPNOTSUPP + * shall be returned if speed selection is not supported. + */ + int (*set_loopback)(struct phy_device *dev, bool enable, int speed); /** @get_sqi: Get the signal quality indication */ int (*get_sqi)(struct phy_device *dev); /** @get_sqi_max: Get the maximum signal quality indication */ @@ -1219,13 +1226,23 @@ struct phy_driver { */ int (*led_polarity_set)(struct phy_device *dev, int index, unsigned long modes); + + /** + * @get_next_update_time: Get the time until the next update event + * @dev: PHY device + * + * Callback to determine the time (in jiffies) until the next + * update event for the PHY state machine. Allows PHY drivers to + * dynamically adjust polling intervals based on link state or other + * conditions. + * + * Returns the time in jiffies until the next update event. + */ + unsigned int (*get_next_update_time)(struct phy_device *dev); }; #define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -#define PHY_ANY_ID "MATCH ANY PHY" -#define PHY_ANY_UID 0xffffffff - #define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0) #define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4) #define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10) @@ -1258,62 +1275,36 @@ static inline bool phydev_id_compare(struct phy_device *phydev, u32 id) return phy_id_compare(id, phydev->phy_id, phydev->drv->phy_id_mask); } -/* A Structure for boards to register fixups with the PHY Lib */ -struct phy_fixup { - struct list_head list; - char bus_id[MII_BUS_ID_SIZE + 3]; - u32 phy_uid; - u32 phy_uid_mask; - int (*run)(struct phy_device *phydev); -}; - const char *phy_speed_to_str(int speed); const char *phy_duplex_to_str(unsigned int duplex); const char *phy_rate_matching_to_str(int rate_matching); int phy_interface_num_ports(phy_interface_t interface); -/* A structure for mapping a particular speed and duplex - * combination to a particular SUPPORTED and ADVERTISED value - */ -struct phy_setting { - u32 speed; - u8 duplex; - u8 bit; -}; - -const struct phy_setting * -phy_lookup_setting(int speed, int duplex, const unsigned long *mask, - bool exact); -size_t phy_speeds(unsigned int *speeds, size_t size, - unsigned long *mask); -void of_set_phy_supported(struct phy_device *phydev); -void of_set_phy_eee_broken(struct phy_device *phydev); -void of_set_phy_timing_role(struct phy_device *phydev); -int phy_speed_down_core(struct phy_device *phydev); - /** - * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised. + * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct - * @link_mode: The broken EEE mode */ -static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode) +static inline bool phy_is_started(struct phy_device *phydev) { - linkmode_set_bit(link_mode, phydev->eee_broken_modes); + return phydev->state >= PHY_UP; } /** - * phy_is_started - Convenience function to check whether PHY is started + * phy_disable_eee_mode - Don't advertise an EEE mode. * @phydev: The phy_device struct + * @link_mode: The EEE mode to be disabled */ -static inline bool phy_is_started(struct phy_device *phydev) +static inline void phy_disable_eee_mode(struct phy_device *phydev, u32 link_mode) { - return phydev->state >= PHY_UP; + WARN_ON(phy_is_started(phydev)); + + linkmode_set_bit(link_mode, phydev->eee_disabled_modes); + linkmode_clear_bit(link_mode, phydev->advertising_eee); } void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); -void phy_check_downshift(struct phy_device *phydev); /** * phy_read - Convenience function for reading a given PHY register @@ -1609,6 +1600,9 @@ static inline bool phy_polling_mode(struct phy_device *phydev) if (phydev->drv->flags & PHY_POLL_CABLE_TEST) return true; + if (phydev->drv->update_stats) + return true; + return phydev->irq == PHY_POLL; } @@ -1690,15 +1684,6 @@ static inline bool phy_is_default_hwtstamp(struct phy_device *phydev) } /** - * phy_is_internal - Convenience function for testing if a PHY is internal - * @phydev: the phy_device struct - */ -static inline bool phy_is_internal(struct phy_device *phydev) -{ - return phydev->is_internal; -} - -/** * phy_on_sfp - Convenience function for testing if a PHY is on an SFP module * @phydev: the phy_device struct */ @@ -1821,7 +1806,7 @@ int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); -int phy_loopback(struct phy_device *phydev, bool enable); +int phy_loopback(struct phy_device *phydev, bool enable, int speed); int phy_sfp_connect_phy(void *upstream, struct phy_device *phy); void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy); void phy_sfp_attach(void *upstream, struct sfp_bus *bus); @@ -1847,6 +1832,9 @@ int phy_config_aneg(struct phy_device *phydev); int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); +unsigned int phy_inband_caps(struct phy_device *phydev, + phy_interface_t interface); +int phy_config_inband(struct phy_device *phydev, unsigned int modes); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); bool phy_check_valid(int speed, int duplex, unsigned long *features); @@ -1933,7 +1921,7 @@ int genphy_read_status(struct phy_device *phydev); int genphy_read_master_slave(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); -int genphy_loopback(struct phy_device *phydev, bool enable); +int genphy_loopback(struct phy_device *phydev, bool enable, int speed); int genphy_soft_reset(struct phy_device *phydev); irqreturn_t genphy_handle_interrupt_no_ack(struct phy_device *phydev); @@ -1975,7 +1963,7 @@ int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); int genphy_c45_baset1_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); -int genphy_c45_loopback(struct phy_device *phydev, bool enable); +int genphy_c45_loopback(struct phy_device *phydev, bool enable, int speed); int genphy_c45_pma_resume(struct phy_device *phydev); int genphy_c45_pma_suspend(struct phy_device *phydev); int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable); @@ -1985,14 +1973,12 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev, const struct phy_plca_cfg *plca_cfg); int genphy_c45_plca_get_status(struct phy_device *phydev, struct phy_plca_status *plca_st); -int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp, bool *is_enabled); +int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *lp); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); -int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; @@ -2018,7 +2004,6 @@ int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_error(struct phy_device *phydev); void phy_state_machine(struct work_struct *work); -void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); void phy_trigger_machine(struct phy_device *phydev); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); @@ -2043,6 +2028,7 @@ void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); void phy_support_eee(struct phy_device *phydev); +void phy_disable_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); @@ -2053,11 +2039,13 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, const int *delay_values, int size, bool is_rx); +int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, + enum ethtool_link_mode_bit_indices linkmode, + u32 *val); + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); -int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)); int phy_register_fixup_for_id(const char *bus_id, int (*run)(struct phy_device *)); int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, @@ -2067,6 +2055,8 @@ int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask); int phy_unregister_fixup_for_id(const char *bus_id); int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); +int phy_eee_tx_clock_stop_capable(struct phy_device *phydev); +int phy_eee_rx_clock_stop(struct phy_device *phydev, bool clk_stop_enable); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); @@ -2079,13 +2069,6 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev, int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); -int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size); -int of_phy_package_join(struct phy_device *phydev, size_t priv_size); -void phy_package_leave(struct phy_device *phydev); -int devm_phy_package_join(struct device *dev, struct phy_device *phydev, - int base_addr, size_t priv_size); -int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, - size_t priv_size); int __init mdio_bus_init(void); void mdio_bus_exit(void); @@ -2115,104 +2098,6 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); -static inline int phy_package_address(struct phy_device *phydev, - unsigned int addr_offset) -{ - struct phy_package_shared *shared = phydev->shared; - u8 base_addr = shared->base_addr; - - if (addr_offset >= PHY_MAX_ADDR - base_addr) - return -EIO; - - /* we know that addr will be in the range 0..31 and thus the - * implicit cast to a signed int is not a problem. - */ - return base_addr + addr_offset; -} - -static inline int phy_package_read(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return mdiobus_read(phydev->mdio.bus, addr, regnum); -} - -static inline int __phy_package_read(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return __mdiobus_read(phydev->mdio.bus, addr, regnum); -} - -static inline int phy_package_write(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum, - u16 val) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return mdiobus_write(phydev->mdio.bus, addr, regnum, val); -} - -static inline int __phy_package_write(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum, - u16 val) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return __mdiobus_write(phydev->mdio.bus, addr, regnum, val); -} - -int __phy_package_read_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum); - -int phy_package_read_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum); - -int __phy_package_write_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum, u16 val); - -int phy_package_write_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum, u16 val); - -static inline bool __phy_package_set_once(struct phy_device *phydev, - unsigned int b) -{ - struct phy_package_shared *shared = phydev->shared; - - if (!shared) - return false; - - return !test_and_set_bit(b, &shared->flags); -} - -static inline bool phy_package_init_once(struct phy_device *phydev) -{ - return __phy_package_set_once(phydev, PHY_SHARED_F_INIT_DONE); -} - -static inline bool phy_package_probe_once(struct phy_device *phydev) -{ - return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE); -} - extern const struct bus_type mdio_bus_type; struct mdio_board_info { diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 03cd5bae92d3..e63e6e70e860 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -227,8 +227,6 @@ int phy_pm_runtime_get(struct phy *phy); int phy_pm_runtime_get_sync(struct phy *phy); int phy_pm_runtime_put(struct phy *phy); int phy_pm_runtime_put_sync(struct phy *phy); -void phy_pm_runtime_allow(struct phy *phy); -void phy_pm_runtime_forbid(struct phy *phy); int phy_init(struct phy *phy); int phy_exit(struct phy *phy); int phy_power_on(struct phy *phy); @@ -321,16 +319,6 @@ static inline int phy_pm_runtime_put_sync(struct phy *phy) return -ENOSYS; } -static inline void phy_pm_runtime_allow(struct phy *phy) -{ - return; -} - -static inline void phy_pm_runtime_forbid(struct phy *phy) -{ - return; -} - static inline int phy_init(struct phy *phy) { if (!phy) diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5c01048860c4..30659b615fca 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -5,6 +5,8 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include <net/eee.h> + struct device_node; struct ethtool_cmd; struct fwnode_handle; @@ -143,11 +145,17 @@ enum phylink_op_type { * possible and avoid stopping it during suspend events. * @default_an_inband: if true, defaults to MLO_AN_INBAND rather than * MLO_AN_PHY. A fixed-link specification will override. + * @eee_rx_clk_stop_enable: if true, PHY can stop the receive clock during LPI * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx * are supported by the MAC/PCS. + * @lpi_interfaces: bitmap describing which PHY interface modes can support + * LPI signalling. * @mac_capabilities: MAC pause/speed/duplex capabilities. + * @lpi_capabilities: MAC speeds which can support LPI signalling + * @lpi_timer_default: Default EEE LPI timer setting. + * @eee_enabled_default: If set, EEE will be enabled by phylink at creation time */ struct phylink_config { struct device *dev; @@ -156,10 +164,15 @@ struct phylink_config { bool mac_managed_pm; bool mac_requires_rxc; bool default_an_inband; + bool eee_rx_clk_stop_enable; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); DECLARE_PHY_INTERFACE_MASK(supported_interfaces); + DECLARE_PHY_INTERFACE_MASK(lpi_interfaces); unsigned long mac_capabilities; + unsigned long lpi_capabilities; + u32 lpi_timer_default; + bool eee_enabled_default; }; void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); @@ -173,6 +186,8 @@ void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); * @mac_finish: finish a major reconfiguration of the interface. * @mac_link_down: take the link down. * @mac_link_up: allow the link to come up. + * @mac_disable_tx_lpi: disable LPI. + * @mac_enable_tx_lpi: enable and configure LPI. * * The individual methods are described more fully below. */ @@ -193,6 +208,9 @@ struct phylink_mac_ops { struct phy_device *phy, unsigned int mode, phy_interface_t interface, int speed, int duplex, bool tx_pause, bool rx_pause); + void (*mac_disable_tx_lpi)(struct phylink_config *config); + int (*mac_enable_tx_lpi)(struct phylink_config *config, u32 timer, + bool tx_clk_stop); }; #if 0 /* For kernel-doc purposes only. */ @@ -343,23 +361,29 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_link_down() - take the link down + * mac_link_down() - notification that the link has gone down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), force the link down and disable any - * Energy Efficient Ethernet MAC configuration. Interface type - * selection must be done in mac_config(). + * Notifies the MAC that the link has gone down. This will not be called + * unless mac_link_up() has been previously called. + * + * The MAC should stop processing packets for transmission and reception. + * phylink will have called netif_carrier_off() to notify the networking + * stack that the link has gone down, so MAC drivers should not make this + * call. + * + * If @mode is %MLO_AN_INBAND, then this function must not prevent the + * link coming up. */ void mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); /** - * mac_link_up() - allow the link to come up + * mac_link_up() - notification that the link has come up * @config: a pointer to a &struct phylink_config. - * @phy: any attached phy + * @phy: any attached phy (deprecated - please use LPI interfaces) * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @speed: link speed @@ -367,7 +391,10 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * @tx_pause: link transmit pause enablement status * @rx_pause: link receive pause enablement status * - * Configure the MAC for an established link. + * Notifies the MAC that the link has come up, and the parameters of the + * link as seen from the MACs point of view. If mac_link_up() has been + * called previously, there will be an intervening call to mac_link_down() + * before this method will be subsequently called. * * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link * settings, and should be used to configure the MAC block appropriately @@ -379,23 +406,51 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * that the user wishes to override the pause settings, and this should * be allowed when considering the implementation of this method. * - * If in-band negotiation mode is disabled, allow the link to come up. If - * @phy is non-%NULL, configure Energy Efficient Ethernet by calling - * phy_init_eee() and perform appropriate MAC configuration for EEE. + * Once configured, the MAC may begin to process packets for transmission + * and reception. + * * Interface type selection must be done in mac_config(). */ void mac_link_up(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, int speed, int duplex, bool tx_pause, bool rx_pause); + +/** + * mac_disable_tx_lpi() - disable LPI generation at the MAC + * @config: a pointer to a &struct phylink_config. + * + * Disable generation of LPI at the MAC, effectively preventing the MAC + * from indicating that it is idle. + */ +void mac_disable_tx_lpi(struct phylink_config *config); + +/** + * mac_enable_tx_lpi() - configure and enable LPI generation at the MAC + * @config: a pointer to a &struct phylink_config. + * @timer: LPI timeout in microseconds. + * @tx_clk_stop: allow xMII transmit clock to be stopped during LPI + * + * Configure the LPI timeout accordingly. This will only be called when + * the link is already up, to cater for situations where the hardware + * needs to be programmed according to the link speed. + * + * Enable LPI generation at the MAC, and configure whether the xMII transmit + * clock may be stopped. + * + * Returns: 0 on success. Please consult with rmk before returning an error. + */ +int mac_enable_tx_lpi(struct phylink_config *config, u32 timer, + bool tx_clk_stop); #endif struct phylink_pcs_ops; /** * struct phylink_pcs - PHYLINK PCS instance + * @supported_interfaces: describing which PHY_INTERFACE_MODE_xxx + * are supported by this PCS. * @ops: a pointer to the &struct phylink_pcs_ops structure * @phylink: pointer to &struct phylink_config - * @neg_mode: provide PCS neg mode via "mode" argument * @poll: poll the PCS for link changes * @rxc_always_on: The MAC driver requires the reference clock * to always be on. Standalone PCS drivers which @@ -409,9 +464,9 @@ struct phylink_pcs_ops; * the PCS driver. */ struct phylink_pcs { + DECLARE_PHY_INTERFACE_MASK(supported_interfaces); const struct phylink_pcs_ops *ops; struct phylink *phylink; - bool neg_mode; bool poll; bool rxc_always_on; }; @@ -419,6 +474,7 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. * @pcs_validate: validate the link configuration. + * @pcs_inband_caps: query inband support for interface mode. * @pcs_enable: enable the PCS. * @pcs_disable: disable the PCS. * @pcs_pre_config: pre-mac_config method (for errata) @@ -428,19 +484,25 @@ struct phylink_pcs { * @pcs_an_restart: restart 802.3z BaseX autonegotiation. * @pcs_link_up: program the PCS for the resolved link configuration * (where necessary). + * @pcs_disable_eee: optional notification to PCS that EEE has been disabled + * at the MAC. + * @pcs_enable_eee: optional notification to PCS that EEE will be enabled at + * the MAC. * @pcs_pre_init: configure PCS components necessary for MAC hardware * initialization e.g. RX clock for stmmac. */ struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); + unsigned int (*pcs_inband_caps)(struct phylink_pcs *pcs, + phy_interface_t interface); int (*pcs_enable)(struct phylink_pcs *pcs); void (*pcs_disable)(struct phylink_pcs *pcs); void (*pcs_pre_config)(struct phylink_pcs *pcs, phy_interface_t interface); int (*pcs_post_config)(struct phylink_pcs *pcs, phy_interface_t interface); - void (*pcs_get_state)(struct phylink_pcs *pcs, + void (*pcs_get_state)(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state); int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, @@ -449,6 +511,8 @@ struct phylink_pcs_ops { void (*pcs_an_restart)(struct phylink_pcs *pcs); void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); + void (*pcs_disable_eee)(struct phylink_pcs *pcs); + void (*pcs_enable_eee)(struct phylink_pcs *pcs); int (*pcs_pre_init)(struct phylink_pcs *pcs); }; @@ -471,6 +535,20 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); /** + * pcs_inband_caps - query PCS in-band capabilities for interface mode. + * @pcs: a pointer to a &struct phylink_pcs. + * @interface: interface mode to be queried + * + * Returns zero if it is unknown what in-band signalling is supported by the + * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise, + * returns a bit mask of the LINK_INBAND_* values from + * &enum link_inband_signalling to describe which inband modes are supported + * for this interface mode. + */ +unsigned int pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface); + +/** * pcs_enable() - enable the PCS. * @pcs: a pointer to a &struct phylink_pcs. */ @@ -485,6 +563,7 @@ void pcs_disable(struct phylink_pcs *pcs); /** * pcs_get_state() - Read the current inband link state from the hardware * @pcs: a pointer to a &struct phylink_pcs. + * @neg_mode: link negotiation mode (PHYLINK_PCS_NEG_xxx) * @state: a pointer to a &struct phylink_link_state. * * Read the current inband link state from the MAC PCS, reporting the @@ -493,8 +572,11 @@ void pcs_disable(struct phylink_pcs *pcs); * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. + * + * Note that the @neg_mode parameter is always the PHYLINK_PCS_NEG_xxx + * state, not MLO_AN_xxx. */ -void pcs_get_state(struct phylink_pcs *pcs, +void pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state); /** @@ -522,6 +604,14 @@ void pcs_get_state(struct phylink_pcs *pcs, * The %neg_mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested * against the PHYLINK_PCS_NEG_* definitions. + * + * pcs_config() will be called when configuration of the PCS is required + * or when the advertisement is possibly updated. It must not unnecessarily + * disrupt an established link. + * + * When an autonegotiation restart is required for 802.3z modes, .pcs_config() + * should return a positive non-zero integer (e.g. 1) to indicate to phylink + * to call the pcs_an_restart() method. */ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, @@ -557,6 +647,22 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); /** + * pcs_disable_eee() - Disable EEE at the PCS + * @pcs: a pointer to a &struct phylink_pcs + * + * Optional method informing the PCS that EEE has been disabled at the MAC. + */ +void pcs_disable_eee(struct phylink_pcs *pcs); + +/** + * pcs_enable_eee() - Enable EEE at the PCS + * @pcs: a pointer to a &struct phylink_pcs + * + * Optional method informing the PCS that EEE is about to be enabled at the MAC. + */ +void pcs_enable_eee(struct phylink_pcs *pcs); + +/** * pcs_pre_init() - Configure PCS components necessary for MAC initialization * @pcs: a pointer to a &struct phylink_pcs. * @@ -609,7 +715,11 @@ int phylink_pcs_pre_init(struct phylink *pl, struct phylink_pcs *pcs); void phylink_start(struct phylink *); void phylink_stop(struct phylink *); +void phylink_rx_clk_stop_block(struct phylink *); +void phylink_rx_clk_stop_unblock(struct phylink *); + void phylink_suspend(struct phylink *pl, bool mac_wol); +void phylink_prepare_resume(struct phylink *pl); void phylink_resume(struct phylink *pl); void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *); @@ -625,7 +735,6 @@ void phylink_ethtool_get_pauseparam(struct phylink *, int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); -int phylink_init_eee(struct phylink *, bool); int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); @@ -668,9 +777,22 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface) } } +/** + * phylink_mac_implements_lpi() - determine if MAC implements LPI ops + * @ops: phylink_mac_ops structure + * + * Returns true if the phylink MAC operations structure indicates that the + * LPI operations have been implemented, false otherwise. + */ +static inline bool phylink_mac_implements_lpi(const struct phylink_mac_ops *ops) +{ + return ops && ops->mac_disable_tx_lpi && ops->mac_enable_tx_lpi; +} + void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, - u16 bmsr, u16 lpa); + unsigned int neg_mode, u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + unsigned int neg_mode, struct phylink_link_state *state); int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface, const unsigned long *advertising); diff --git a/include/linux/pid.h b/include/linux/pid.h index a3aad9b4074c..311ecebd7d56 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -59,6 +59,7 @@ struct pid spinlock_t lock; struct dentry *stashed; u64 ino; + struct rb_node pidfs_node; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -68,6 +69,7 @@ struct pid struct upid numbers[]; }; +extern seqcount_spinlock_t pidmap_lock_seq; extern struct pid init_struct_pid; struct file; @@ -99,16 +101,13 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); * these helpers must be called with the tasklist_lock write-held. */ extern void attach_pid(struct task_struct *task, enum pid_type); -extern void detach_pid(struct task_struct *task, enum pid_type); -extern void change_pid(struct task_struct *task, enum pid_type, - struct pid *pid); +void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type); +void change_pid(struct pid **pids, struct task_struct *task, enum pid_type, + struct pid *pid); extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); -extern int pid_max; -extern int pid_max_min, pid_max_max; - /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. @@ -130,6 +129,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *); extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, size_t set_tid_size); extern void free_pid(struct pid *pid); +void free_pids(struct pid **pids); extern void disable_pid_allocation(struct pid_namespace *ns); /* diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index f9f9931e02d6..7c67a5811199 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -30,6 +30,7 @@ struct pid_namespace { struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; + int pid_max; struct pid_namespace *parent; #ifdef CONFIG_BSD_PROCESS_ACCT struct fs_pin *bacct; @@ -38,9 +39,14 @@ struct pid_namespace { struct ucounts *ucounts; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) + struct work_struct work; +#ifdef CONFIG_SYSCTL + struct ctl_table_set set; + struct ctl_table_header *sysctls; +#if defined(CONFIG_MEMFD_CREATE) int memfd_noexec_scope; #endif +#endif } __randomize_layout; extern struct pid_namespace init_pid_ns; @@ -117,6 +123,8 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pid_idr_init(void); +int register_pidns_sysctls(struct pid_namespace *pidns); +void unregister_pidns_sysctls(struct pid_namespace *pidns); static inline bool task_is_in_init_pid_ns(struct task_struct *tsk) { diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 75bdf9807802..05e6f8f4a026 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -4,5 +4,9 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); +void pidfs_add_pid(struct pid *pid); +void pidfs_remove_pid(struct pid *pid); +void pidfs_exit(struct task_struct *tsk); +extern const struct dentry_operations pidfs_dentry_operations; #endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 53cfde98433d..1bcf071b860e 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -232,4 +232,8 @@ static inline int pinconf_generic_dt_node_to_map_all(struct pinctrl_dev *pctldev PIN_MAP_TYPE_INVALID); } +int pinconf_generic_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev, + struct device_node *np, + struct pinctrl_map **map, + unsigned int *num_maps); #endif /* __LINUX_PINCTRL_PINCONF_GENERIC_H */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8ff23bf5a819..9d42d473d201 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -31,6 +31,33 @@ struct pipe_buffer { unsigned long private; }; +/* + * Really only alpha needs 32-bit fields, but + * might as well do it for 64-bit architectures + * since that's what we've historically done, + * and it makes 'head_tail' always be a simple + * 'unsigned long'. + */ +#ifdef CONFIG_64BIT +typedef unsigned int pipe_index_t; +#else +typedef unsigned short pipe_index_t; +#endif + +/* + * We have to declare this outside 'struct pipe_inode_info', + * but then we can't use 'union pipe_index' for an anonymous + * union, so we end up having to duplicate this declaration + * below. Annoying. + */ +union pipe_index { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; +}; + /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing @@ -38,6 +65,7 @@ struct pipe_buffer { * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @head_tail: unsigned long union of @head and @tail * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) @@ -58,8 +86,16 @@ struct pipe_buffer { struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; - unsigned int head; - unsigned int tail; + + /* This has to match the 'union pipe_index' above */ + union { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; + }; + unsigned int max_usage; unsigned int ring_size; unsigned int nr_accounted; @@ -72,7 +108,7 @@ struct pipe_inode_info { #ifdef CONFIG_WATCH_QUEUE bool note_loss; #endif - struct page *tmp_page; + struct page *tmp_page[2]; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; @@ -141,23 +177,23 @@ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) } /** - * pipe_empty - Return true if the pipe is empty + * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline bool pipe_empty(unsigned int head, unsigned int tail) +static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { - return head == tail; + return (pipe_index_t)(head - tail); } /** - * pipe_occupancy - Return number of slots used in the pipe + * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) +static inline bool pipe_empty(unsigned int head, unsigned int tail) { - return head - tail; + return !pipe_occupancy(head, tail); } /** @@ -173,6 +209,33 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, } /** + * pipe_is_full - Return true if the pipe is full + * @pipe: the pipe + */ +static inline bool pipe_is_full(const struct pipe_inode_info *pipe) +{ + return pipe_full(pipe->head, pipe->tail, pipe->max_usage); +} + +/** + * pipe_is_empty - Return true if the pipe is empty + * @pipe: the pipe + */ +static inline bool pipe_is_empty(const struct pipe_inode_info *pipe) +{ + return pipe_empty(pipe->head, pipe->tail); +} + +/** + * pipe_buf_usage - Return how many pipe buffers are in use + * @pipe: the pipe + */ +static inline unsigned int pipe_buf_usage(const struct pipe_inode_info *pipe) +{ + return pipe_occupancy(pipe->head, pipe->tail); +} + +/** * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring * @pipe: The pipe to access * @slot: The slot of interest @@ -245,15 +308,6 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } -static inline void pipe_discard_from(struct pipe_inode_info *pipe, - unsigned int old_head) -{ - unsigned int mask = pipe->ring_size - 1; - - while (pipe->head > old_head) - pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); -} - /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE diff --git a/include/linux/platform_data/clk-davinci-pll.h b/include/linux/platform_data/clk-davinci-pll.h deleted file mode 100644 index e55dab1d578b..000000000000 --- a/include/linux/platform_data/clk-davinci-pll.h +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PLL clock driver for TI Davinci SoCs - * - * Copyright (C) 2018 David Lechner <david@lechnology.com> - */ - -#ifndef __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ -#define __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ - -#include <linux/regmap.h> - -/** - * davinci_pll_platform_data - * @cfgchip: CFGCHIP syscon regmap - */ -struct davinci_pll_platform_data { - struct regmap *cfgchip; -}; - -#endif /* __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ */ diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index b3c4993e656e..1f4e4f2b89bb 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5044,8 +5044,12 @@ struct ec_response_pd_status { #define PD_EVENT_POWER_CHANGE BIT(1) #define PD_EVENT_IDENTITY_RECEIVED BIT(2) #define PD_EVENT_DATA_SWAP BIT(3) +#define PD_EVENT_TYPEC BIT(4) +#define PD_EVENT_PPM BIT(5) +#define PD_EVENT_INIT BIT(6) + struct ec_response_host_event_status { - uint32_t status; /* PD MCU host event status */ + uint32_t status; /* PD MCU host event status */ } __ec_align4; /* Set USB type-C port role and muxes */ @@ -6105,6 +6109,29 @@ struct ec_response_typec_vdm_response { #undef VDO_MAX_SIZE +/* + * UCSI OPM-PPM commands + * + * These commands are used for communication between OPM and PPM. + * Only UCSI3.0 is tested. + */ + +#define EC_CMD_UCSI_PPM_SET 0x0140 + +/* The data size is stored in the host command protocol header. */ +struct ec_params_ucsi_ppm_set { + uint16_t offset; + uint8_t data[]; +} __ec_align2; + +#define EC_CMD_UCSI_PPM_GET 0x0141 + +/* For 'GET' sub-commands, data will be returned as a raw payload. */ +struct ec_params_ucsi_ppm_get { + uint16_t offset; + uint8_t size; +} __ec_align2; + /*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index b34ed0cc1f8d..3ec24f445c29 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -42,6 +42,11 @@ #define EC_MAX_RESPONSE_OVERHEAD 32 /* + * ACPI notify value for MKBP host event. + */ +#define ACPI_NOTIFY_CROS_EC_MKBP 0x80 + +/* * EC panic is not covered by the standard (0-F) ACPI notify values. * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF * device specific ACPI notify range. @@ -246,6 +251,8 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); +int cros_ec_rwsig_continue(struct cros_ec_device *ec_dev); + int cros_ec_query_all(struct cros_ec_device *ec_dev); int cros_ec_get_next_event(struct cros_ec_device *ec_dev, diff --git a/include/linux/platform_data/huawei-gaokun-ec.h b/include/linux/platform_data/huawei-gaokun-ec.h new file mode 100644 index 000000000000..faa15d315128 --- /dev/null +++ b/include/linux/platform_data/huawei-gaokun-ec.h @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Huawei Matebook E Go Embedded Controller + * + * Copyright (C) 2024-2025 Pengyu Luo <mitltlatltl@gmail.com> + */ + +#ifndef __HUAWEI_GAOKUN_EC_H__ +#define __HUAWEI_GAOKUN_EC_H__ + +#define GAOKUN_UCSI_CCI_SIZE 4 +#define GAOKUN_UCSI_MSGI_SIZE 16 +#define GAOKUN_UCSI_READ_SIZE (GAOKUN_UCSI_CCI_SIZE + GAOKUN_UCSI_MSGI_SIZE) +#define GAOKUN_UCSI_WRITE_SIZE 24 /* 8B CTRL, 16B MSGO */ + +#define GAOKUN_UCSI_NO_PORT_UPDATE (-1) + +#define GAOKUN_SMART_CHARGE_DATA_SIZE 4 /* mode, delay, start, end */ + +/* -------------------------------------------------------------------------- */ + +struct gaokun_ec; +struct gaokun_ucsi_reg; +struct notifier_block; + +#define GAOKUN_MOD_NAME "huawei_gaokun_ec" +#define GAOKUN_DEV_PSY "psy" +#define GAOKUN_DEV_UCSI "ucsi" + +/* -------------------------------------------------------------------------- */ +/* Common API */ + +int gaokun_ec_register_notify(struct gaokun_ec *ec, + struct notifier_block *nb); +void gaokun_ec_unregister_notify(struct gaokun_ec *ec, + struct notifier_block *nb); + +int gaokun_ec_read(struct gaokun_ec *ec, const u8 *req, + size_t resp_len, u8 *resp); +int gaokun_ec_write(struct gaokun_ec *ec, const u8 *req); +int gaokun_ec_read_byte(struct gaokun_ec *ec, const u8 *req, u8 *byte); + +/* -------------------------------------------------------------------------- */ +/* API for PSY */ + +int gaokun_ec_psy_multi_read(struct gaokun_ec *ec, u8 reg, + size_t resp_len, u8 *resp); + +static inline int gaokun_ec_psy_read_byte(struct gaokun_ec *ec, + u8 reg, u8 *byte) +{ + return gaokun_ec_psy_multi_read(ec, reg, sizeof(*byte), byte); +} + +static inline int gaokun_ec_psy_read_word(struct gaokun_ec *ec, + u8 reg, u16 *word) +{ + return gaokun_ec_psy_multi_read(ec, reg, sizeof(*word), (u8 *)word); +} + +int gaokun_ec_psy_get_smart_charge(struct gaokun_ec *ec, + u8 resp[GAOKUN_SMART_CHARGE_DATA_SIZE]); +int gaokun_ec_psy_set_smart_charge(struct gaokun_ec *ec, + const u8 req[GAOKUN_SMART_CHARGE_DATA_SIZE]); + +int gaokun_ec_psy_get_smart_charge_enable(struct gaokun_ec *ec, bool *on); +int gaokun_ec_psy_set_smart_charge_enable(struct gaokun_ec *ec, bool on); + +/* -------------------------------------------------------------------------- */ +/* API for UCSI */ + +int gaokun_ec_ucsi_read(struct gaokun_ec *ec, u8 resp[GAOKUN_UCSI_READ_SIZE]); +int gaokun_ec_ucsi_write(struct gaokun_ec *ec, + const u8 req[GAOKUN_UCSI_WRITE_SIZE]); + +int gaokun_ec_ucsi_get_reg(struct gaokun_ec *ec, struct gaokun_ucsi_reg *ureg); +int gaokun_ec_ucsi_pan_ack(struct gaokun_ec *ec, int port_id); + +#endif /* __HUAWEI_GAOKUN_EC_H__ */ diff --git a/include/linux/platform_data/i2c-davinci.h b/include/linux/platform_data/i2c-davinci.h deleted file mode 100644 index 98967df07468..000000000000 --- a/include/linux/platform_data/i2c-davinci.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * DaVinci I2C controller platform_device info - * - * Author: Vladimir Barinov, MontaVista Software, Inc. <source@mvista.com> - * - * 2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. -*/ - -#ifndef __ASM_ARCH_I2C_H -#define __ASM_ARCH_I2C_H - -/* All frequencies are expressed in kHz */ -struct davinci_i2c_platform_data { - unsigned int bus_freq; /* standard bus frequency (kHz) */ - unsigned int bus_delay; /* post-transaction delay (usec) */ - bool gpio_recovery; /* Use GPIO recovery method */ - bool has_pfunc; /* Chip has a ICPFUNC register */ -}; - -/* for board setup code */ -void davinci_init_i2c(struct davinci_i2c_platform_data *); - -#endif /* __ASM_ARCH_I2C_H */ diff --git a/include/linux/platform_data/keyscan-davinci.h b/include/linux/platform_data/keyscan-davinci.h deleted file mode 100644 index 260d596ba0af..000000000000 --- a/include/linux/platform_data/keyscan-davinci.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009 Texas Instruments, Inc - * - * Author: Miguel Aguilar <miguel.aguilar@ridgerun.com> - */ - -#ifndef DAVINCI_KEYSCAN_H -#define DAVINCI_KEYSCAN_H - -#include <linux/io.h> - -enum davinci_matrix_types { - DAVINCI_KEYSCAN_MATRIX_4X4, - DAVINCI_KEYSCAN_MATRIX_5X3, -}; - -struct davinci_ks_platform_data { - int (*device_enable)(struct device *dev); - unsigned short *keymap; - u32 keymapsize; - u8 rep:1; - u8 strobe; - u8 interval; - u8 matrix_type; -}; - -#endif - diff --git a/include/linux/platform_data/syscon.h b/include/linux/platform_data/syscon.h deleted file mode 100644 index 2c089dd3e2bd..000000000000 --- a/include/linux/platform_data/syscon.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PLATFORM_DATA_SYSCON_H -#define PLATFORM_DATA_SYSCON_H - -struct syscon_platform_data { - const char *label; -}; - -#endif diff --git a/include/linux/platform_data/x86/intel_pmc_ipc.h b/include/linux/platform_data/x86/intel_pmc_ipc.h new file mode 100644 index 000000000000..1d34435b7001 --- /dev/null +++ b/include/linux/platform_data/x86/intel_pmc_ipc.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel Core SoC Power Management Controller Header File + * + * Copyright (c) 2025, Intel Corporation. + * All Rights Reserved. + * + */ +#ifndef INTEL_PMC_IPC_H +#define INTEL_PMC_IPC_H +#include <linux/acpi.h> + +#define IPC_SOC_REGISTER_ACCESS 0xAA +#define IPC_SOC_SUB_CMD_READ 0x00 +#define IPC_SOC_SUB_CMD_WRITE 0x01 +#define PMC_IPCS_PARAM_COUNT 7 +#define VALID_IPC_RESPONSE 5 + +struct pmc_ipc_cmd { + u32 cmd; + u32 sub_cmd; + u32 size; + u32 wbuf[4]; +}; + +struct pmc_ipc_rbuf { + u32 buf[4]; +}; + +/** + * intel_pmc_ipc() - PMC IPC Mailbox accessor + * @ipc_cmd: Prepared input command to send + * @rbuf: Allocated array for returned IPC data + * + * Return: 0 on success. Non-zero on mailbox error + */ +static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf *rbuf) +{ +#ifdef CONFIG_ACPI + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object params[PMC_IPCS_PARAM_COUNT] = { + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + }; + struct acpi_object_list arg_list = { PMC_IPCS_PARAM_COUNT, params }; + union acpi_object *obj; + int status; + + if (!ipc_cmd || !rbuf) + return -EINVAL; + + /* + * 0: IPC Command + * 1: IPC Sub Command + * 2: Size + * 3-6: Write Buffer for offset + */ + params[0].integer.value = ipc_cmd->cmd; + params[1].integer.value = ipc_cmd->sub_cmd; + params[2].integer.value = ipc_cmd->size; + params[3].integer.value = ipc_cmd->wbuf[0]; + params[4].integer.value = ipc_cmd->wbuf[1]; + params[5].integer.value = ipc_cmd->wbuf[2]; + params[6].integer.value = ipc_cmd->wbuf[3]; + + status = acpi_evaluate_object(NULL, "\\IPCS", &arg_list, &buffer); + if (ACPI_FAILURE(status)) + return -ENODEV; + + obj = buffer.pointer; + + if (obj && obj->type == ACPI_TYPE_PACKAGE && + obj->package.count == VALID_IPC_RESPONSE) { + const union acpi_object *objs = obj->package.elements; + + if ((u8)objs[0].integer.value != 0) + return -EINVAL; + + rbuf->buf[0] = objs[1].integer.value; + rbuf->buf[1] = objs[2].integer.value; + rbuf->buf[2] = objs[3].integer.value; + rbuf->buf[3] = objs[4].integer.value; + } else { + return -EINVAL; + } + + return 0; +#else + return -ENODEV; +#endif /* CONFIG_ACPI */ +} + +#endif /* INTEL_PMC_IPC_H */ diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h index 752c06b47cc8..f0349edb47f4 100644 --- a/include/linux/platform_data/x86/pwm-lpss.h +++ b/include/linux/platform_data/x86/pwm-lpss.h @@ -15,9 +15,36 @@ struct pwm_lpss_boardinfo { unsigned int npwm; unsigned long base_unit_bits; /* - * Some versions of the IP may stuck in the state machine if enable - * bit is not set, and hence update bit will show busy status till - * the reset. For the rest it may be otherwise. + * NOTE: + * Intel Broxton, Apollo Lake, and Gemini Lake have different programming flow. + * + * Initial Enable or First Activation + * 1. Program the base unit and on time divisor values. + * 2. Set the software update bit. + * 3. Poll in a loop on the PWMCTRL bit until software update bit is cleared.+ + * 4. Enable the PWM output by setting PWM Enable. + * 5. Repeat the above steps for the next PWM Module. + * + * Dynamic update while PWM is Enabled + * 1. Program the base unit and on-time divisor values. + * 2. Set the software update bit. + * 3. Repeat the above steps for the next PWM module. + * + * + After setting PWMCTRL register's SW update bit, hardware automatically + * deasserts the SW update bit after a brief delay. It was observed that + * setting of PWM enable is typically done via read-modify-write of the PWMCTRL + * register. If there is no/little delay between setting software update bit + * and setting enable bit via read-modify-write, it is possible that the read + * could return with software enable as 1. In that case, the last write to set + * enable to 1 could also set sw_update to 1. If this happens, sw_update gets + * stuck and the driver code can hang as it explicitly waits for sw_update bit + * to be 0 after setting the enable bit to 1. To avoid this race condition, + * SW should poll on the software update bit to make sure that it is 0 before + * doing the read-modify-write to set the enable bit to 1. + * + * Also, we noted that if sw_update bit was set in step #1 above then when it + * is set again in step #2, sw_update bit never gets cleared and the flow hangs. + * As such, we need to make sure that sw_update bit is 0 when doing step #1. */ bool bypass; /* diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index f5492ed413f3..a299225ab92e 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -9,6 +9,7 @@ #ifndef _PLATFORM_PROFILE_H_ #define _PLATFORM_PROFILE_H_ +#include <linux/device.h> #include <linux/bitops.h> /* @@ -23,20 +24,37 @@ enum platform_profile_option { PLATFORM_PROFILE_BALANCED, PLATFORM_PROFILE_BALANCED_PERFORMANCE, PLATFORM_PROFILE_PERFORMANCE, + PLATFORM_PROFILE_CUSTOM, PLATFORM_PROFILE_LAST, /*must always be last */ }; -struct platform_profile_handler { - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; - int (*profile_get)(struct platform_profile_handler *pprof, - enum platform_profile_option *profile); - int (*profile_set)(struct platform_profile_handler *pprof, - enum platform_profile_option profile); +/** + * struct platform_profile_ops - platform profile operations + * @probe: Callback to setup choices available to the new class device. These + * choices will only be enforced when setting a new profile, not when + * getting the current one. + * @hidden_choices: Callback to setup choices that are not visible to the user + * but can be set by the driver. + * @profile_get: Callback that will be called when showing the current platform + * profile in sysfs. + * @profile_set: Callback that will be called when storing a new platform + * profile in sysfs. + */ +struct platform_profile_ops { + int (*probe)(void *drvdata, unsigned long *choices); + int (*hidden_choices)(void *drvdata, unsigned long *choices); + int (*profile_get)(struct device *dev, enum platform_profile_option *profile); + int (*profile_set)(struct device *dev, enum platform_profile_option profile); }; -int platform_profile_register(struct platform_profile_handler *pprof); -int platform_profile_remove(void); +struct device *platform_profile_register(struct device *dev, const char *name, + void *drvdata, + const struct platform_profile_ops *ops); +void platform_profile_remove(struct device *dev); +struct device *devm_platform_profile_register(struct device *dev, const char *name, + void *drvdata, + const struct platform_profile_ops *ops); int platform_profile_cycle(void); -void platform_profile_notify(void); +void platform_profile_notify(struct device *dev); #endif /*_PLATFORM_PROFILE_H_*/ diff --git a/include/linux/pldmfw.h b/include/linux/pldmfw.h index 0fc831338226..f5047983004f 100644 --- a/include/linux/pldmfw.h +++ b/include/linux/pldmfw.h @@ -125,9 +125,17 @@ struct pldmfw_ops; * a pointer to their own data, used to implement the device specific * operations. */ + +enum pldmfw_update_mode { + PLDMFW_UPDATE_MODE_FULL, + PLDMFW_UPDATE_MODE_SINGLE_COMPONENT, +}; + struct pldmfw { const struct pldmfw_ops *ops; struct device *dev; + u16 component_identifier; + enum pldmfw_update_mode mode; }; bool pldmfw_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record); diff --git a/include/linux/pm.h b/include/linux/pm.h index e7f0260f15ad..f0bd8fbae4f2 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \ #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns") #else #define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) #endif #ifdef CONFIG_PM_SLEEP @@ -570,7 +566,8 @@ const struct dev_pm_ops name = { \ { .event = PM_EVENT_AUTO_RESUME, }) #define PMSG_IS_AUTO(msg) (((msg).event & PM_EVENT_AUTO) != 0) - +#define PMSG_NO_WAKEUP(msg) (((msg).event & \ + (PM_EVENT_FREEZE | PM_EVENT_QUIESCE)) != 0) /* * Device run-time power management status. * @@ -600,6 +597,7 @@ enum rpm_status { RPM_RESUMING, RPM_SUSPENDED, RPM_SUSPENDING, + RPM_BLOCKED, }; /* @@ -681,7 +679,8 @@ struct dev_pm_info { bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ - bool async_in_progress:1; /* Owned by the PM core */ + bool work_in_progress:1; /* Owned by the PM core */ + bool smart_suspend:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ #else @@ -840,10 +839,8 @@ extern int pm_generic_resume_early(struct device *dev); extern int pm_generic_resume_noirq(struct device *dev); extern int pm_generic_resume(struct device *dev); extern int pm_generic_freeze_noirq(struct device *dev); -extern int pm_generic_freeze_late(struct device *dev); extern int pm_generic_freeze(struct device *dev); extern int pm_generic_thaw_noirq(struct device *dev); -extern int pm_generic_thaw_early(struct device *dev); extern int pm_generic_thaw(struct device *dev); extern int pm_generic_restore_noirq(struct device *dev); extern int pm_generic_restore_early(struct device *dev); @@ -885,10 +882,8 @@ static inline void dpm_for_each_dev(void *data, void (*fn)(struct device *, void #define pm_generic_resume_noirq NULL #define pm_generic_resume NULL #define pm_generic_freeze_noirq NULL -#define pm_generic_freeze_late NULL #define pm_generic_freeze NULL #define pm_generic_thaw_noirq NULL -#define pm_generic_thaw_early NULL #define pm_generic_thaw NULL #define pm_generic_restore_noirq NULL #define pm_generic_restore_early NULL diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index 68669ce18720..c3b46fa358d3 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -41,9 +41,7 @@ extern int pm_clk_create(struct device *dev); extern void pm_clk_destroy(struct device *dev); extern int pm_clk_add(struct device *dev, const char *con_id); extern int pm_clk_add_clk(struct device *dev, struct clk *clk); -extern int of_pm_clk_add_clk(struct device *dev, const char *name); extern int of_pm_clk_add_clks(struct device *dev); -extern void pm_clk_remove(struct device *dev, const char *con_id); extern void pm_clk_remove_clk(struct device *dev, struct clk *clk); extern int pm_clk_suspend(struct device *dev); extern int pm_clk_resume(struct device *dev); @@ -76,9 +74,6 @@ static inline int of_pm_clk_add_clks(struct device *dev) { return -EINVAL; } -static inline void pm_clk_remove(struct device *dev, const char *con_id) -{ -} #define pm_clk_suspend NULL #define pm_clk_resume NULL static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 45646bfcaf1a..d56a78af4af1 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -147,6 +147,7 @@ struct genpd_governor_data { }; struct genpd_power_state { + const char *name; s64 power_off_latency_ns; s64 power_on_latency_ns; s64 residency_ns; @@ -260,6 +261,7 @@ struct generic_pm_domain_data { unsigned int rpm_pstate; unsigned int opp_token; bool hw_mode; + bool rpm_always_on; void *data; }; @@ -292,6 +294,7 @@ ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); void dev_pm_genpd_synced_poweroff(struct device *dev); int dev_pm_genpd_set_hwmode(struct device *dev, bool enable); bool dev_pm_genpd_get_hwmode(struct device *dev); +int dev_pm_genpd_rpm_always_on(struct device *dev, bool on); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -375,6 +378,11 @@ static inline bool dev_pm_genpd_get_hwmode(struct device *dev) return false; } +static inline int dev_pm_genpd_rpm_always_on(struct device *dev, bool on) +{ + return -EOPNOTSUPP; +} + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 568183e3e641..c247317aae38 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -100,8 +100,11 @@ struct dev_pm_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); +void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); +unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index); + unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *supplies); @@ -158,6 +161,7 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, unsigned int *bw, int index); +void dev_pm_opp_get(struct dev_pm_opp *opp); void dev_pm_opp_put(struct dev_pm_opp *opp); int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); @@ -203,8 +207,15 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device * return ERR_PTR(-EOPNOTSUPP); } +static inline void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) {} + static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {} +static inline unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index) +{ + return 0; +} + static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { return 0; @@ -334,6 +345,8 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline void dev_pm_opp_get(struct dev_pm_opp *opp) {} + static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} static inline int diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d39dc863f612..756b842dcd30 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -66,6 +66,7 @@ static inline bool queue_pm_work(struct work_struct *work) extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); +extern bool pm_runtime_need_not_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); extern int pm_runtime_force_resume(struct device *dev); @@ -77,6 +78,8 @@ extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); +extern bool pm_runtime_block_if_disabled(struct device *dev); +extern void pm_runtime_unblock(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); extern void pm_runtime_allow(struct device *dev); @@ -93,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); +int devm_pm_runtime_set_active_enabled(struct device *dev); extern int devm_pm_runtime_enable(struct device *dev); +int devm_pm_runtime_get_noresume(struct device *dev); /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. @@ -197,6 +202,17 @@ static inline bool pm_runtime_enabled(struct device *dev) } /** + * pm_runtime_blocked - Check if runtime PM enabling is blocked. + * @dev: Target device. + * + * Do not call this function outside system suspend/resume code paths. + */ +static inline bool pm_runtime_blocked(struct device *dev) +{ + return dev->power.last_status == RPM_BLOCKED; +} + +/** * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present. * @dev: Target device. * @@ -241,6 +257,7 @@ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } +static inline bool pm_runtime_need_not_resume(struct device *dev) {return true; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } static inline int pm_runtime_force_resume(struct device *dev) { return 0; } @@ -271,12 +288,17 @@ static inline int pm_runtime_get_if_active(struct device *dev) static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } +static inline bool pm_runtime_block_if_disabled(struct device *dev) { return true; } +static inline void pm_runtime_unblock(struct device *dev) {} static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} +static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } +static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} @@ -556,11 +578,18 @@ static inline int pm_runtime_set_suspended(struct device *dev) * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * - * Prevent the runtime PM framework from working with @dev (by incrementing its - * "blocking" counter). + * Prevent the runtime PM framework from working with @dev by incrementing its + * "disable" counter. + * + * If the counter is zero when this function runs and there is a pending runtime + * resume request for @dev, it will be resumed. If the counter is still zero at + * that point, all of the pending runtime PM requests for @dev will be canceled + * and all runtime PM operations in progress involving it will be waited for to + * complete. * - * For each invocation of this function for @dev there must be a matching - * pm_runtime_enable() call in order for runtime PM to be enabled for it. + * For each invocation of this function for @dev, there must be a matching + * pm_runtime_enable() call, so that runtime PM is eventually enabled for it + * again. */ static inline void pm_runtime_disable(struct device *dev) { diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h index d9642c6cf852..25b63ed51b76 100644 --- a/include/linux/pm_wakeirq.h +++ b/include/linux/pm_wakeirq.h @@ -10,6 +10,7 @@ extern int dev_pm_set_wake_irq(struct device *dev, int irq); extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq); extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq); extern void dev_pm_clear_wake_irq(struct device *dev); +extern int devm_pm_set_wake_irq(struct device *dev, int irq); #else /* !CONFIG_PM */ @@ -32,5 +33,10 @@ static inline void dev_pm_clear_wake_irq(struct device *dev) { } +static inline int devm_pm_set_wake_irq(struct device *dev, int irq) +{ + return 0; +} + #endif /* CONFIG_PM */ #endif /* _LINUX_PM_WAKEIRQ_H */ diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 222f7530806c..51e0e8dd5f9e 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -205,17 +205,17 @@ static inline void device_set_awake_path(struct device *dev) static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { - return pm_wakeup_ws_event(ws, msec, false); + pm_wakeup_ws_event(ws, msec, false); } static inline void pm_wakeup_event(struct device *dev, unsigned int msec) { - return pm_wakeup_dev_event(dev, msec, false); + pm_wakeup_dev_event(dev, msec, false); } static inline void pm_wakeup_hard_event(struct device *dev) { - return pm_wakeup_dev_event(dev, 0, true); + pm_wakeup_dev_event(dev, 0, true); } /** @@ -240,4 +240,21 @@ static inline int device_init_wakeup(struct device *dev, bool enable) return 0; } +static void device_disable_wakeup(void *dev) +{ + device_init_wakeup(dev, false); +} + +/** + * devm_device_init_wakeup - Resource managed device wakeup initialization. + * @dev: Device to handle. + * + * This function is the devm managed version of device_init_wakeup(dev, true). + */ +static inline int devm_device_init_wakeup(struct device *dev) +{ + device_init_wakeup(dev, true); + return devm_add_action_or_reset(dev, device_disable_wakeup, dev); +} + #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h index fa9f08164c36..884040e1383b 100644 --- a/include/linux/pmbus.h +++ b/include/linux/pmbus.h @@ -73,6 +73,20 @@ */ #define PMBUS_USE_COEFFICIENTS_CMD BIT(5) +/* + * PMBUS_OP_PROTECTED + * Set if the chip OPERATION command is protected and protection is not + * determined by the standard WRITE_PROTECT command. + */ +#define PMBUS_OP_PROTECTED BIT(6) + +/* + * PMBUS_VOUT_PROTECTED + * Set if the chip VOUT_COMMAND command is protected and protection is not + * determined by the standard WRITE_PROTECT command. + */ +#define PMBUS_VOUT_PROTECTED BIT(7) + struct pmbus_platform_data { u32 flags; /* Device specific flags */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b7a7158aaf65..23fe3eaf242d 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -290,7 +290,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data) } struct pnp_fixup { - char id[7]; + char id[8]; void (*quirk_function) (struct pnp_dev *dev); /* fixup function */ }; diff --git a/include/linux/poison.h b/include/linux/poison.h index 331a9a996fa8..8ca2235f78d5 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -70,6 +70,10 @@ #define KEY_DESTROY 0xbd /********** net/core/page_pool.c **********/ +/* + * page_pool uses additional free bits within this value to store data, see the + * definition of PP_DMA_INDEX_MASK in mm.h + */ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) /********** net/core/skbuff.c **********/ diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index ef8619f48920..a500d3160fe8 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -95,10 +95,13 @@ struct posix_clock { * struct posix_clock_context - represents clock file operations context * * @clk: Pointer to the clock + * @fp: Pointer to the file used to open the clock * @private_clkdata: Pointer to user data * * Drivers should use struct posix_clock_context during specific character - * device file operation methods to access the posix clock. + * device file operation methods to access the posix clock. In particular, + * the file pointer can be used to verify correct access mode for ioctl() + * calls. * * Drivers can store a private data structure during the open operation * if they have specific information that is required in other file @@ -106,6 +109,7 @@ struct posix_clock { */ struct posix_clock_context { struct posix_clock *clk; + struct file *fp; void *private_clkdata; }; diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index f11f10c97bd9..dd48c64b605e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -114,6 +114,7 @@ bool posixtimer_init_sigqueue(struct sigqueue *q); void posixtimer_send_sigqueue(struct k_itimer *tmr); bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); void posixtimer_free_timer(struct k_itimer *timer); +long posixtimer_create_prctl(unsigned long ctrl); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ @@ -140,6 +141,7 @@ static inline void posixtimer_rearm_itimer(struct task_struct *p) { } static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) { return false; } static inline void posixtimer_free_timer(struct k_itimer *timer) { } +static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK @@ -177,23 +179,26 @@ static inline void posix_cputimers_init_work(void) { } * @rcu: RCU head for freeing the timer. */ struct k_itimer { - struct hlist_node list; - struct hlist_node ignored_list; + /* 1st cacheline contains read-mostly fields */ struct hlist_node t_hash; - spinlock_t it_lock; - const struct k_clock *kclock; - clockid_t it_clock; + struct hlist_node list; timer_t it_id; + clockid_t it_clock; + int it_sigev_notify; + enum pid_type it_pid_type; + struct signal_struct *it_signal; + const struct k_clock *kclock; + + /* 2nd cacheline and above contain fields which are modified regularly */ + spinlock_t it_lock; int it_status; bool it_sig_periodic; s64 it_overrun; s64 it_overrun_last; unsigned int it_signal_seq; unsigned int it_sigqueue_seq; - int it_sigev_notify; - enum pid_type it_pid_type; ktime_t it_interval; - struct signal_struct *it_signal; + struct hlist_node ignored_list; union { struct pid *it_pid; struct task_struct *it_process; @@ -210,7 +215,7 @@ struct k_itimer { } alarm; } it; struct rcu_head rcu; -}; +} ____cacheline_aligned_in_smp; void run_posix_cpu_timers(void); void posix_cpu_timers_exit(struct task_struct *task); @@ -240,6 +245,13 @@ static inline void posixtimer_sigqueue_putref(struct sigqueue *q) posixtimer_putref(tmr); } + +static inline bool posixtimer_valid(const struct k_itimer *timer) +{ + unsigned long val = (unsigned long)timer->it_signal; + + return !(val & 0x1UL); +} #else /* CONFIG_POSIX_TIMERS */ static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { } static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index e2d47eb1a7f3..62d497763e25 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -27,11 +27,16 @@ struct posix_acl_entry { }; struct posix_acl { - refcount_t a_refcount; - unsigned int a_count; - struct rcu_head a_rcu; + /* New members MUST be added within the struct_group() macro below. */ + struct_group_tagged(posix_acl_hdr, hdr, + refcount_t a_refcount; + unsigned int a_count; + struct rcu_head a_rcu; + ); struct posix_acl_entry a_entries[] __counted_by(a_count); }; +static_assert(offsetof(struct posix_acl, a_entries) == sizeof(struct posix_acl_hdr), + "struct member likely outside of struct_group_tagged()"); #define FOREACH_ACL_ENTRY(pa, acl, pe) \ for(pa=(acl)->a_entries, pe=pa+(acl)->a_count; pa<pe; pa++) diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 5180dc9f1706..d56e1276aafe 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -61,6 +61,8 @@ struct bq27xxx_device_info { struct bq27xxx_access_methods bus; struct bq27xxx_reg_cache cache; int charge_design_full; + int voltage_min_design; + int voltage_max_design; bool removed; unsigned long last_update; union power_supply_propval last_status; diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h new file mode 100644 index 000000000000..fdec9af9c541 --- /dev/null +++ b/include/linux/power/max77705_charger.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Maxim MAX77705 definitions. + * + * Copyright (C) 2015 Samsung Electronics, Inc. + * Copyright (C) 2025 Dzmitry Sankouski <dsankouski@gmail.com> + */ + +#ifndef __MAX77705_CHARGER_H +#define __MAX77705_CHARGER_H __FILE__ + +/* MAX77705_CHG_REG_CHG_INT */ +#define MAX77705_BYP_I BIT(0) +#define MAX77705_INP_LIMIT_I BIT(1) +#define MAX77705_BATP_I BIT(2) +#define MAX77705_BAT_I BIT(3) +#define MAX77705_CHG_I BIT(4) +#define MAX77705_WCIN_I BIT(5) +#define MAX77705_CHGIN_I BIT(6) +#define MAX77705_AICL_I BIT(7) + +/* MAX77705_CHG_REG_CHG_INT_MASK */ +#define MAX77705_BYP_IM BIT(0) +#define MAX77705_INP_LIMIT_IM BIT(1) +#define MAX77705_BATP_IM BIT(2) +#define MAX77705_BAT_IM BIT(3) +#define MAX77705_CHG_IM BIT(4) +#define MAX77705_WCIN_IM BIT(5) +#define MAX77705_CHGIN_IM BIT(6) +#define MAX77705_AICL_IM BIT(7) + +/* MAX77705_CHG_REG_CHG_INT_OK */ +#define MAX77705_BYP_OK BIT(0) +#define MAX77705_DISQBAT_OK BIT(1) +#define MAX77705_BATP_OK BIT(2) +#define MAX77705_BAT_OK BIT(3) +#define MAX77705_CHG_OK BIT(4) +#define MAX77705_WCIN_OK BIT(5) +#define MAX77705_CHGIN_OK BIT(6) +#define MAX77705_AICL_OK BIT(7) + +/* MAX77705_CHG_REG_DETAILS_00 */ +#define MAX77705_BATP_DTLS BIT(0) +#define MAX77705_WCIN_DTLS GENMASK(4, 3) +#define MAX77705_WCIN_DTLS_SHIFT 3 +#define MAX77705_CHGIN_DTLS GENMASK(6, 5) +#define MAX77705_CHGIN_DTLS_SHIFT 5 + +/* MAX77705_CHG_REG_DETAILS_01 */ +#define MAX77705_CHG_DTLS GENMASK(3, 0) +#define MAX77705_CHG_DTLS_SHIFT 0 +#define MAX77705_BAT_DTLS GENMASK(6, 4) +#define MAX77705_BAT_DTLS_SHIFT 4 + +/* MAX77705_CHG_REG_DETAILS_02 */ +#define MAX77705_BYP_DTLS GENMASK(3, 0) +#define MAX77705_BYP_DTLS_SHIFT 0 + +/* MAX77705_CHG_REG_CNFG_00 */ +#define MAX77705_CHG_SHIFT 0 +#define MAX77705_UNO_SHIFT 1 +#define MAX77705_OTG_SHIFT 1 +#define MAX77705_BUCK_SHIFT 2 +#define MAX77705_BOOST_SHIFT 3 +#define MAX77705_WDTEN_SHIFT 4 +#define MAX77705_MODE_MASK GENMASK(3, 0) +#define MAX77705_CHG_MASK BIT(MAX77705_CHG_SHIFT) +#define MAX77705_UNO_MASK BIT(MAX77705_UNO_SHIFT) +#define MAX77705_OTG_MASK BIT(MAX77705_OTG_SHIFT) +#define MAX77705_BUCK_MASK BIT(MAX77705_BUCK_SHIFT) +#define MAX77705_BOOST_MASK BIT(MAX77705_BOOST_SHIFT) +#define MAX77705_WDTEN_MASK BIT(MAX77705_WDTEN_SHIFT) +#define MAX77705_UNO_CTRL (MAX77705_UNO_MASK | MAX77705_BOOST_MASK) +#define MAX77705_OTG_CTRL (MAX77705_OTG_MASK | MAX77705_BOOST_MASK) + +/* MAX77705_CHG_REG_CNFG_01 */ +#define MAX77705_FCHGTIME_SHIFT 0 +#define MAX77705_FCHGTIME_MASK GENMASK(2, 0) +#define MAX77705_CHG_RSTRT_SHIFT 4 +#define MAX77705_CHG_RSTRT_MASK GENMASK(5, 4) +#define MAX77705_FCHGTIME_DISABLE 0 +#define MAX77705_CHG_RSTRT_DISABLE 0x3 + +#define MAX77705_PQEN_SHIFT 7 +#define MAX77705_PQEN_MASK BIT(7) +#define MAX77705_CHG_PQEN_DISABLE 0 +#define MAX77705_CHG_PQEN_ENABLE 1 + +/* MAX77705_CHG_REG_CNFG_02 */ +#define MAX77705_OTG_ILIM_SHIFT 6 +#define MAX77705_OTG_ILIM_MASK GENMASK(7, 6) +#define MAX77705_OTG_ILIM_500 0 +#define MAX77705_OTG_ILIM_900 1 +#define MAX77705_OTG_ILIM_1200 2 +#define MAX77705_OTG_ILIM_1500 3 +#define MAX77705_CHG_CC GENMASK(5, 0) + +/* MAX77705_CHG_REG_CNFG_03 */ +#define MAX77705_TO_ITH_SHIFT 0 +#define MAX77705_TO_ITH_MASK GENMASK(2, 0) +#define MAX77705_TO_TIME_SHIFT 3 +#define MAX77705_TO_TIME_MASK GENMASK(5, 3) +#define MAX77705_SYS_TRACK_DIS_SHIFT 7 +#define MAX77705_SYS_TRACK_DIS_MASK BIT(7) +#define MAX77705_TO_ITH_150MA 0 +#define MAX77705_TO_TIME_30M 3 +#define MAX77705_SYS_TRACK_ENABLE 0 +#define MAX77705_SYS_TRACK_DISABLE 1 + +/* MAX77705_CHG_REG_CNFG_04 */ +#define MAX77705_CHG_MINVSYS_SHIFT 6 +#define MAX77705_CHG_MINVSYS_MASK GENMASK(7, 6) +#define MAX77705_CHG_PRM_SHIFT 0 +#define MAX77705_CHG_PRM_MASK GENMASK(5, 0) + +#define MAX77705_CHG_CV_PRM_SHIFT 0 +#define MAX77705_CHG_CV_PRM_MASK GENMASK(5, 0) + +/* MAX77705_CHG_REG_CNFG_05 */ +#define MAX77705_REG_B2SOVRC_SHIFT 0 +#define MAX77705_REG_B2SOVRC_MASK GENMASK(3, 0) +#define MAX77705_B2SOVRC_DISABLE 0 +#define MAX77705_B2SOVRC_4_5A 6 +#define MAX77705_B2SOVRC_4_8A 8 +#define MAX77705_B2SOVRC_5_0A 9 + +/* MAX77705_CHG_CNFG_06 */ +#define MAX77705_WDTCLR_SHIFT 0 +#define MAX77705_WDTCLR_MASK GENMASK(1, 0) +#define MAX77705_WDTCLR 1 +#define MAX77705_CHGPROT_MASK GENMASK(3, 2) +#define MAX77705_CHGPROT_UNLOCKED GENMASK(3, 2) +#define MAX77705_SLOWEST_LX_SLOPE GENMASK(6, 5) + +/* MAX77705_CHG_REG_CNFG_07 */ +#define MAX77705_CHG_FMBST 4 +#define MAX77705_REG_FMBST_SHIFT 2 +#define MAX77705_REG_FMBST_MASK BIT(MAX77705_REG_FMBST_SHIFT) +#define MAX77705_REG_FGSRC_SHIFT 1 +#define MAX77705_REG_FGSRC_MASK BIT(MAX77705_REG_FGSRC_SHIFT) + +/* MAX77705_CHG_REG_CNFG_08 */ +#define MAX77705_REG_FSW_SHIFT 0 +#define MAX77705_REG_FSW_MASK GENMASK(1, 0) +#define MAX77705_CHG_FSW_3MHz 0 +#define MAX77705_CHG_FSW_2MHz 1 +#define MAX77705_CHG_FSW_1_5MHz 2 + +/* MAX77705_CHG_REG_CNFG_09 */ +#define MAX77705_CHG_CHGIN_LIM_MASK GENMASK(6, 0) +#define MAX77705_CHG_EN_MASK BIT(7) +#define MAX77705_CHG_DISABLE 0 +#define MAX77705_CHARGER_CHG_CHARGING(_reg) \ + (((_reg) & MAX77705_CHG_EN_MASK) > 1) + + +/* MAX77705_CHG_REG_CNFG_10 */ +#define MAX77705_CHG_WCIN_LIM GENMASK(5, 0) + +/* MAX77705_CHG_REG_CNFG_11 */ +#define MAX77705_VBYPSET_SHIFT 0 +#define MAX77705_VBYPSET_MASK GENMASK(6, 0) + +/* MAX77705_CHG_REG_CNFG_12 */ +#define MAX77705_CHGINSEL_SHIFT 5 +#define MAX77705_CHGINSEL_MASK BIT(MAX77705_CHGINSEL_SHIFT) +#define MAX77705_WCINSEL_SHIFT 6 +#define MAX77705_WCINSEL_MASK BIT(MAX77705_WCINSEL_SHIFT) +#define MAX77705_VCHGIN_REG_MASK GENMASK(4, 3) +#define MAX77705_WCIN_REG_MASK GENMASK(2, 1) +#define MAX77705_REG_DISKIP_SHIFT 0 +#define MAX77705_REG_DISKIP_MASK BIT(MAX77705_REG_DISKIP_SHIFT) +/* REG=4.5V, UVLO=4.7V */ +#define MAX77705_VCHGIN_4_5 0 +/* REG=4.5V, UVLO=4.7V */ +#define MAX77705_WCIN_4_5 0 +#define MAX77705_DISABLE_SKIP 1 +#define MAX77705_AUTO_SKIP 0 + +/* uA */ +#define MAX77705_CURRENT_CHGIN_STEP 25000 +#define MAX77705_CURRENT_CHG_STEP 50000 +#define MAX77705_CURRENT_CHGIN_MIN 100000 +#define MAX77705_CURRENT_CHGIN_MAX 3200000 + +struct max77705_charger_data { + struct device *dev; + struct regmap *regmap; + struct power_supply_battery_info *bat_info; + struct workqueue_struct *wqueue; + struct work_struct chgin_work; + struct power_supply *psy_chg; +}; + +#endif /* __MAX77705_CHARGER_H */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index b98106e1a90f..888824592953 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -15,6 +15,8 @@ #include <linux/device.h> #include <linux/workqueue.h> #include <linux/leds.h> +#include <linux/rwsem.h> +#include <linux/list.h> #include <linux/spinlock.h> #include <linux/notifier.h> @@ -40,7 +42,7 @@ enum { }; /* What algorithm is the charger using? */ -enum { +enum power_supply_charge_type { POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0, POWER_SUPPLY_CHARGE_TYPE_NONE, POWER_SUPPLY_CHARGE_TYPE_TRICKLE, /* slow speed */ @@ -58,6 +60,7 @@ enum { POWER_SUPPLY_HEALTH_OVERHEAT, POWER_SUPPLY_HEALTH_DEAD, POWER_SUPPLY_HEALTH_OVERVOLTAGE, + POWER_SUPPLY_HEALTH_UNDERVOLTAGE, POWER_SUPPLY_HEALTH_UNSPEC_FAILURE, POWER_SUPPLY_HEALTH_COLD, POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE, @@ -99,6 +102,7 @@ enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, POWER_SUPPLY_PROP_CHARGE_TYPE, + POWER_SUPPLY_PROP_CHARGE_TYPES, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, @@ -245,6 +249,7 @@ struct power_supply_desc { const char *name; enum power_supply_type type; u8 charge_behaviours; + u32 charge_types; u32 usb_types; const enum power_supply_property *properties; size_t num_properties; @@ -269,7 +274,6 @@ struct power_supply_desc { int (*property_is_writeable)(struct power_supply *psy, enum power_supply_property psp); void (*external_power_changed)(struct power_supply *psy); - void (*set_charged)(struct power_supply *psy); /* * Set if thermal zone should not be created for this power supply. @@ -281,6 +285,28 @@ struct power_supply_desc { int use_for_apm; }; +struct power_supply_ext { + const char *const name; + u8 charge_behaviours; + const enum power_supply_property *properties; + size_t num_properties; + + int (*get_property)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp, + union power_supply_propval *val); + int (*set_property)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp, + const union power_supply_propval *val); + int (*property_is_writeable)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp); +}; + struct power_supply { const struct power_supply_desc *desc; @@ -289,7 +315,6 @@ struct power_supply { char **supplied_from; size_t num_supplies; - struct device_node *of_node; /* Driver private data */ void *drv_data; @@ -300,10 +325,13 @@ struct power_supply { struct delayed_work deferred_register_work; spinlock_t changed_lock; bool changed; + bool update_groups; bool initialized; bool removing; atomic_t use_cnt; struct power_supply_battery_info *battery_info; + struct rw_semaphore extensions_sem; /* protects "extensions" */ + struct list_head extensions; #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd; struct thermal_cooling_device *tcd; @@ -318,6 +346,8 @@ struct power_supply { #endif }; +#define dev_to_psy(__dev) container_of_const(__dev, struct power_supply, dev) + /* * This is recommended structure to specify static power supply parameters. * Generic one, parametrizable for different power supplies. Power supply @@ -820,7 +850,6 @@ extern int power_supply_am_i_supplied(struct power_supply *psy); int power_supply_get_property_from_supplier(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); -extern int power_supply_set_battery_charged(struct power_supply *psy); static inline bool power_supply_supports_maintenance_charging(struct power_supply_battery_info *info) @@ -878,10 +907,18 @@ devm_power_supply_register(struct device *parent, extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); +extern int __must_check +power_supply_register_extension(struct power_supply *psy, + const struct power_supply_ext *ext, + struct device *dev, + void *data); +extern void power_supply_unregister_extension(struct power_supply *psy, + const struct power_supply_ext *ext); + #define to_power_supply(device) container_of(device, struct power_supply, dev) extern void *power_supply_get_drvdata(struct power_supply *psy); -extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)); +extern int power_supply_for_each_psy(void *data, int (*fn)(struct power_supply *psy, void *data)); static inline bool power_supply_is_amp_property(enum power_supply_property psp) { @@ -944,6 +981,11 @@ ssize_t power_supply_charge_behaviour_show(struct device *dev, char *buf); int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf); +ssize_t power_supply_charge_types_show(struct device *dev, + unsigned int available_types, + enum power_supply_charge_type current_type, + char *buf); +int power_supply_charge_types_parse(unsigned int available_types, const char *buf); #else static inline ssize_t power_supply_charge_behaviour_show(struct device *dev, @@ -959,6 +1001,20 @@ static inline int power_supply_charge_behaviour_parse(unsigned int available_beh { return -EOPNOTSUPP; } + +static inline +ssize_t power_supply_charge_types_show(struct device *dev, + unsigned int available_types, + enum power_supply_charge_type current_type, + char *buf) +{ + return -EOPNOTSUPP; +} + +static inline int power_supply_charge_types_parse(unsigned int available_types, const char *buf) +{ + return -EOPNOTSUPP; +} #endif #endif /* __LINUX_POWER_SUPPLY_H__ */ diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index 45e6e427ceb8..f73fbea0dbc2 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -42,8 +42,7 @@ struct ppp_channel { int hdrlen; /* amount of headroom channel needs */ void *ppp; /* opaque to channel */ int speed; /* transfer rate (bytes/second) */ - /* the following is not used at present */ - int latency; /* overhead time in milliseconds */ + bool direct_xmit; /* no qdisc, xmit directly */ }; #ifdef __KERNEL__ diff --git a/include/linux/pps_gen_kernel.h b/include/linux/pps_gen_kernel.h new file mode 100644 index 000000000000..6214c8aa2e02 --- /dev/null +++ b/include/linux/pps_gen_kernel.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PPS generator API kernel header + * + * Copyright (C) 2024 Rodolfo Giometti <giometti@enneenne.com> + */ + +#ifndef LINUX_PPS_GEN_KERNEL_H +#define LINUX_PPS_GEN_KERNEL_H + +#include <linux/pps_gen.h> +#include <linux/cdev.h> +#include <linux/device.h> + +/* + * Global defines + */ + +#define PPS_GEN_MAX_SOURCES 16 /* should be enough... */ + +struct pps_gen_device; + +/** + * struct pps_gen_source_info - the specific PPS generator info + * @use_system_clock: true, if the system clock is used to generate pulses + * @get_time: query the time stored into the generator clock + * @enable: enable/disable the PPS pulses generation + * + * This is the main generator struct where all needed information must be + * placed before calling the pps_gen_register_source(). + */ +struct pps_gen_source_info { + bool use_system_clock; + + int (*get_time)(struct pps_gen_device *pps_gen, + struct timespec64 *time); + int (*enable)(struct pps_gen_device *pps_gen, bool enable); + +/* private: internal use only */ + struct module *owner; + struct device *parent; /* for device_create */ +}; + +/* The main struct */ +struct pps_gen_device { + const struct pps_gen_source_info *info; /* PSS generator info */ + bool enabled; /* PSS generator status */ + + unsigned int event; + unsigned int sequence; + + unsigned int last_ev; /* last PPS event id */ + wait_queue_head_t queue; /* PPS event queue */ + + unsigned int id; /* PPS generator unique ID */ + struct cdev cdev; + struct device *dev; + struct fasync_struct *async_queue; /* fasync method */ + spinlock_t lock; +}; + +/* + * Global variables + */ + +extern const struct attribute_group *pps_gen_groups[]; + +/* + * Exported functions + */ + +extern struct pps_gen_device *pps_gen_register_source( + const struct pps_gen_source_info *info); +extern void pps_gen_unregister_source(struct pps_gen_device *pps_gen); +extern void pps_gen_event(struct pps_gen_device *pps_gen, + unsigned int event, void *data); + +#endif /* LINUX_PPS_GEN_KERNEL_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index ca86235ac15c..b0af8d4ef6e6 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -319,6 +319,7 @@ do { \ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; +struct task_struct; /** * preempt_ops - notifiers called when a task is preempted and rescheduled @@ -515,6 +516,8 @@ static inline bool preempt_model_rt(void) return IS_ENABLED(CONFIG_PREEMPT_RT); } +extern const char *preempt_model_str(void); + /* * Does the preemption model allow non-cooperative preemption? * diff --git a/include/linux/printk.h b/include/linux/printk.h index 4217a9f412b2..5b462029d03c 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -207,6 +207,7 @@ void printk_legacy_allow_panic_sync(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); void nbcon_atomic_flush_unsafe(void); +bool pr_flush(int timeout_ms, bool reset_on_progress); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -315,6 +316,11 @@ static inline void nbcon_atomic_flush_unsafe(void) { } +static inline bool pr_flush(int timeout_ms, bool reset_on_progress) +{ + return true; +} + #endif bool this_cpu_in_panic(void); diff --git a/include/linux/prmt.h b/include/linux/prmt.h index 9c094294403f..c53ab287e932 100644 --- a/include/linux/prmt.h +++ b/include/linux/prmt.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/uuid.h> + #ifdef CONFIG_ACPI_PRMT void init_prmt(void); int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 0b2a89854440..ea62201c74c4 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -20,10 +20,13 @@ enum { * If in doubt, ignore this flag. */ #ifdef MODULE - PROC_ENTRY_PERMANENT = 0U, + PROC_ENTRY_PERMANENT = 0U, #else - PROC_ENTRY_PERMANENT = 1U << 0, + PROC_ENTRY_PERMANENT = 1U << 0, #endif + + PROC_ENTRY_proc_read_iter = 1U << 1, + PROC_ENTRY_proc_compat_ioctl = 1U << 2, }; struct proc_ops { diff --git a/include/linux/property.h b/include/linux/property.h index 61fc20e5f81f..e214ecd241eb 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -37,6 +37,7 @@ struct fwnode_handle *__dev_fwnode(struct device *dev); struct device *: __dev_fwnode)(dev) bool device_property_present(const struct device *dev, const char *propname); +bool device_property_read_bool(const struct device *dev, const char *propname); int device_property_read_u8_array(const struct device *dev, const char *propname, u8 *val, size_t nval); int device_property_read_u16_array(const struct device *dev, const char *propname, @@ -54,6 +55,8 @@ int device_property_match_string(const struct device *dev, bool fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname); +bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, + const char *propname); int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode, const char *propname, u8 *val, size_t nval); @@ -207,12 +210,6 @@ int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); unsigned int device_get_child_node_count(const struct device *dev); -static inline bool device_property_read_bool(const struct device *dev, - const char *propname) -{ - return device_property_present(dev, propname); -} - static inline int device_property_read_u8(const struct device *dev, const char *propname, u8 *val) { @@ -263,12 +260,6 @@ static inline int device_property_string_array_count(const struct device *dev, return device_property_read_string_array(dev, propname, NULL, 0); } -static inline bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, - const char *propname) -{ - return fwnode_property_present(fwnode, propname); -} - static inline int fwnode_property_read_u8(const struct fwnode_handle *fwnode, const char *propname, u8 *val) { diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index df1592022d93..c773eeb92d04 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -5,15 +5,37 @@ #ifndef _LINUX_PSE_CONTROLLER_H #define _LINUX_PSE_CONTROLLER_H -#include <linux/ethtool.h> #include <linux/list.h> #include <uapi/linux/ethtool.h> /* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */ #define MAX_PI_CURRENT 1920000 +/* Maximum power in mW according to IEEE 802.3-2022 Table 145-16 */ +#define MAX_PI_PW 99900 struct phy_device; struct pse_controller_dev; +struct netlink_ext_ack; + +/* C33 PSE extended state and substate. */ +struct ethtool_c33_pse_ext_state_info { + enum ethtool_c33_pse_ext_state c33_pse_ext_state; + union { + enum ethtool_c33_pse_ext_substate_error_condition error_condition; + enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable; + enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted; + enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim; + enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected; + enum ethtool_c33_pse_ext_substate_power_not_available power_not_available; + enum ethtool_c33_pse_ext_substate_short_detected short_detected; + u32 __c33_pse_ext_substate; + }; +}; + +struct ethtool_c33_pse_pw_limit_range { + u32 min; + u32 max; +}; /** * struct pse_control_config - PSE control/channel configuration. @@ -29,7 +51,52 @@ struct pse_control_config { }; /** - * struct pse_control_status - PSE control/channel status. + * struct pse_admin_state - PSE operational state + * + * @podl_admin_state: operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @c33_admin_state: operational state of the PSE + * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState + */ +struct pse_admin_state { + enum ethtool_podl_pse_admin_state podl_admin_state; + enum ethtool_c33_pse_admin_state c33_admin_state; +}; + +/** + * struct pse_pw_status - PSE power detection status + * + * @podl_pw_status: power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @c33_pw_status: power detection status of the PSE. + * IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus: + */ +struct pse_pw_status { + enum ethtool_podl_pse_pw_d_status podl_pw_status; + enum ethtool_c33_pse_pw_d_status c33_pw_status; +}; + +/** + * struct pse_ext_state_info - PSE extended state information + * + * @c33_ext_state_info: extended state information of the PSE + */ +struct pse_ext_state_info { + struct ethtool_c33_pse_ext_state_info c33_ext_state_info; +}; + +/** + * struct pse_pw_limit_ranges - PSE power limit configuration range + * + * @c33_pw_limit_ranges: supported power limit configuration range. The driver + * is in charge of the memory allocation. + */ +struct pse_pw_limit_ranges { + struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges; +}; + +/** + * struct ethtool_pse_control_status - PSE control/channel status. * * @podl_admin_state: operational state of the PoDL PSE * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState @@ -47,11 +114,11 @@ struct pse_control_config { * @c33_avail_pw_limit: available power limit of the PSE in mW * IEEE 802.3-2022 145.2.5.4 pse_avail_pwr * @c33_pw_limit_ranges: supported power limit configuration range. The driver - * is in charge of the memory allocation. + * is in charge of the memory allocation * @c33_pw_limit_nb_ranges: number of supported power limit configuration * ranges */ -struct pse_control_status { +struct ethtool_pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; enum ethtool_c33_pse_admin_state c33_admin_state; @@ -67,23 +134,35 @@ struct pse_control_status { /** * struct pse_controller_ops - PSE controller driver callbacks * - * @ethtool_get_status: get PSE control status for ethtool interface * @setup_pi_matrix: setup PI matrix of the PSE controller - * @pi_is_enabled: Return 1 if the PSE PI is enabled, 0 if not. - * May also return negative errno. + * @pi_get_admin_state: Get the operational state of the PSE PI. This ops + * is mandatory. + * @pi_get_pw_status: Get the power detection status of the PSE PI. This + * ops is mandatory. + * @pi_get_ext_state: Get the extended state of the PSE PI. + * @pi_get_pw_class: Get the power class of the PSE PI. + * @pi_get_actual_pw: Get actual power of the PSE PI in mW. * @pi_enable: Configure the PSE PI as enabled. * @pi_disable: Configure the PSE PI as disabled. * @pi_get_voltage: Return voltage similarly to get_voltage regulator - * callback. - * @pi_get_pw_limit: Get the configured power limit of the PSE PI. - * @pi_set_pw_limit: Configure the power limit of the PSE PI. + * callback in uV. + * @pi_get_pw_limit: Get the configured power limit of the PSE PI in mW. + * @pi_set_pw_limit: Configure the power limit of the PSE PI in mW. + * @pi_get_pw_limit_ranges: Get the supported power limit configuration + * range. The driver is in charge of the memory + * allocation and should return the number of + * ranges. */ struct pse_controller_ops { - int (*ethtool_get_status)(struct pse_controller_dev *pcdev, - unsigned long id, struct netlink_ext_ack *extack, - struct pse_control_status *status); int (*setup_pi_matrix)(struct pse_controller_dev *pcdev); - int (*pi_is_enabled)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_admin_state)(struct pse_controller_dev *pcdev, int id, + struct pse_admin_state *admin_state); + int (*pi_get_pw_status)(struct pse_controller_dev *pcdev, int id, + struct pse_pw_status *pw_status); + int (*pi_get_ext_state)(struct pse_controller_dev *pcdev, int id, + struct pse_ext_state_info *ext_state_info); + int (*pi_get_pw_class)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_actual_pw)(struct pse_controller_dev *pcdev, int id); int (*pi_enable)(struct pse_controller_dev *pcdev, int id); int (*pi_disable)(struct pse_controller_dev *pcdev, int id); int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id); @@ -91,12 +170,15 @@ struct pse_controller_ops { int id); int (*pi_set_pw_limit)(struct pse_controller_dev *pcdev, int id, int max_mW); + int (*pi_get_pw_limit_ranges)(struct pse_controller_dev *pcdev, int id, + struct pse_pw_limit_ranges *pw_limit_ranges); }; struct module; struct device_node; struct of_phandle_args; struct pse_control; +struct ethtool_pse_control_status; /* PSE PI pairset pinout can either be Alternative A or Alternative B */ enum pse_pi_pairset_pinout { @@ -173,15 +255,13 @@ void pse_control_put(struct pse_control *psec); int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, - struct pse_control_status *status); + struct ethtool_pse_control_status *status); int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config); int pse_ethtool_set_pw_limit(struct pse_control *psec, struct netlink_ext_ack *extack, const unsigned int pw_limit); -int pse_ethtool_get_pw_limit(struct pse_control *psec, - struct netlink_ext_ack *extack); bool pse_has_podl(struct pse_control *psec); bool pse_has_c33(struct pse_control *psec); @@ -199,7 +279,7 @@ static inline void pse_control_put(struct pse_control *psec) static inline int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, - struct pse_control_status *status) + struct ethtool_pse_control_status *status) { return -EOPNOTSUPP; } @@ -218,12 +298,6 @@ static inline int pse_ethtool_set_pw_limit(struct pse_control *psec, return -EOPNOTSUPP; } -static inline int pse_ethtool_get_pw_limit(struct pse_control *psec, - struct netlink_ext_ack *extack) -{ - return -EOPNOTSUPP; -} - static inline bool pse_has_podl(struct pse_control *psec) { return false; diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index 730f77381d55..2503f7625d65 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,6 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; + const struct export_operations *eops; const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 903ddfea8585..f3cad182d4ef 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -815,6 +815,15 @@ struct sev_data_snp_commit { #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** + * sev_module_init - perform PSP SEV module initialization + * + * Returns: + * 0 if the PSP module is successfully initialized + * negative value if the PSP module initialization fails + */ +int sev_module_init(void); + +/** * sev_platform_init - perform SEV INIT command * * @args: struct sev_platform_init_args to pass in arguments diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index c892d22ce0a7..0d68d09bedd1 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -307,7 +307,7 @@ static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm) * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. * - * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * Returns: a valid pointer on success or PTR_ERR on failure. If PHC * support is missing at the configuration level, this function * returns NULL, and drivers are expected to gracefully handle that * case separately. @@ -445,7 +445,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); * @hwtstamp: timestamp * @vclock_index: phc index of ptp vclock. * - * Returns converted timestamp, or 0 on error. + * Returns: converted timestamp, or 0 on error. */ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index); #else diff --git a/include/linux/pwm.h b/include/linux/pwm.h index a2df509056ac..9ece4e5d3815 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -379,7 +379,7 @@ static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) dev_set_drvdata(&chip->dev, data); } -#if IS_ENABLED(CONFIG_PWM) +#if IS_REACHABLE(CONFIG_PWM) /* PWM consumer APIs */ int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); @@ -661,7 +661,7 @@ struct pwm_lookup { PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id, _period, \ _polarity, NULL) -#if IS_ENABLED(CONFIG_PWM) +#if IS_REACHABLE(CONFIG_PWM) void pwm_add_table(struct pwm_lookup *table, size_t num); void pwm_remove_table(struct pwm_lookup *table, size_t num); #else diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 5b67cd03276e..aa29ac53b833 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -267,7 +267,7 @@ struct qed_ll2_ops { int qed_ll2_alloc_if(struct qed_dev *); void qed_ll2_dealloc_if(struct qed_dev *); #else -static const struct qed_ll2_ops qed_ll2_ops_pass = { +static __maybe_unused const struct qed_ll2_ops qed_ll2_ops_pass = { .start = NULL, .stop = NULL, .start_xmit = NULL, diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 7c173aa64e1e..8d2ba3749866 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -211,6 +211,43 @@ rb_add(struct rb_node *node, struct rb_root *tree, } /** + * rb_find_add_cached() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_cached(struct rb_node *node, struct rb_root_cached *tree, + int (*cmp)(const struct rb_node *new, const struct rb_node *exist)) +{ + bool leftmost = true; + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) { + link = &parent->rb_left; + } else if (c > 0) { + link = &parent->rb_right; + leftmost = false; + } else { + return parent; + } + } + + rb_link_node(node, parent, link); + rb_insert_color_cached(node, tree, leftmost); + return NULL; +} + +/** * rb_find_add() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert * @tree: tree to search / modify diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 14dfa6008467..1b11926ddd47 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -30,6 +30,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) * way, we must not access it directly */ #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) +/* + * Return the ->prev pointer of a list_head in an rcu safe way. Don't + * access it directly. + * + * Any list traversed with list_bidir_prev_rcu() must never use + * list_del_rcu(). Doing so will poison the ->prev pointer that + * list_bidir_prev_rcu() relies on, which will result in segfaults. + * To prevent these segfaults, use list_bidir_del_rcu() instead + * of list_del_rcu(). + */ +#define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) /** * list_tail_rcu - returns the prev pointer of the head of the list @@ -159,6 +170,39 @@ static inline void list_del_rcu(struct list_head *entry) } /** + * list_bidir_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * In contrast to list_del_rcu() doesn't poison the prev pointer thus + * allowing backwards traversal via list_bidir_prev_rcu(). + * + * Note: list_empty() on entry does not return true after this because + * the entry is in a special undefined state that permits RCU-based + * lockfree reverse traversal. In particular this means that we can not + * poison the forward and backwards pointers that may still be used for + * walking the list. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another list-mutation + * primitive, such as list_bidir_del_rcu() or list_add_rcu(), running on + * this same list. However, it is perfectly legal to run concurrently + * with the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that list_del_rcu() and list_bidir_del_rcu() must not be used on + * the same list. + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_bidir_del_rcu(struct list_head *entry) +{ + __list_del_entry(entry); +} + +/** * hlist_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 48e5c03df1dd..120536f4c6eb 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -95,9 +95,9 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { - preempt_enable(); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) rcu_read_unlock_strict(); + preempt_enable(); } static inline int rcu_preempt_depth(void) @@ -121,12 +121,6 @@ void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -#ifdef CONFIG_TASKS_RCU_GENERIC -void rcu_init_tasks_generic(void); -#else -static inline void rcu_init_tasks_generic(void) { } -#endif - #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); @@ -138,7 +132,7 @@ static inline void rcu_sysrq_end(void) { } #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else -static inline void rcu_irq_work_resched(void) { } +static __always_inline void rcu_irq_work_resched(void) { } #endif #ifdef CONFIG_RCU_NOCB_CPU @@ -806,11 +800,9 @@ do { \ * sections, invocation of the corresponding RCU callback is deferred * until after the all the other CPUs exit their critical sections. * - * In v5.0 and later kernels, synchronize_rcu() and call_rcu() also - * wait for regions of code with preemption disabled, including regions of - * code with interrupts or softirqs disabled. In pre-v5.0 kernels, which - * define synchronize_sched(), only code enclosed within rcu_read_lock() - * and rcu_read_unlock() are guaranteed to be waited for. + * Both synchronize_rcu() and call_rcu() also wait for regions of code + * with preemption disabled, including regions of code with interrupts or + * softirqs disabled. * * Note, however, that RCU callbacks are permitted to run concurrently * with new RCU read-side critical sections. One way that this can happen @@ -865,11 +857,10 @@ static __always_inline void rcu_read_lock(void) * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * In almost all situations, rcu_read_unlock() is immune from deadlock. - * In recent kernels that have consolidated synchronize_sched() and - * synchronize_rcu_bh() into synchronize_rcu(), this deadlock immunity - * also extends to the scheduler's runqueue and priority-inheritance - * spinlocks, courtesy of the quiescent-state deferral that is carried - * out when rcu_read_unlock() is invoked with interrupts disabled. + * This deadlock immunity also extends to the scheduler's runqueue + * and priority-inheritance spinlocks, courtesy of the quiescent-state + * deferral that is carried out when rcu_read_unlock() is invoked with + * interrupts disabled. * * See rcu_read_lock() for more information. */ @@ -1025,12 +1016,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_POINTER_INITIALIZER(p, v) \ .p = RCU_INITIALIZER(v) -/* - * Does the specified offset indicate that the corresponding rcu_head - * structure can be handled by kvfree_rcu()? - */ -#define __is_kvfree_rcu_offset(offset) ((offset) < 4096) - /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree for double-argument invocations. @@ -1041,11 +1026,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * when they are used in a kernel module, that module must invoke the * high-latency rcu_barrier() function at module-unload time. * - * The kfree_rcu() function handles this issue. Rather than encoding a - * function address in the embedded rcu_head structure, kfree_rcu() instead - * encodes the offset of the rcu_head structure within the base structure. - * Because the functions are not allowed in the low-order 4096 bytes of - * kernel virtual memory, offsets up to 4095 bytes can be accommodated. + * The kfree_rcu() function handles this issue. In order to have a universal + * callback function handling different offsets of rcu_head, the callback needs + * to determine the starting address of the freed object, which can be a large + * kmalloc or vmalloc allocation. To allow simply aligning the pointer down to + * page boundary for those, only offsets up to 4095 bytes can be accommodated. * If the offset is larger than 4095 bytes, a compile-time error will * be generated in kvfree_rcu_arg_2(). If this error is triggered, you can * either fall back to use of call_rcu() or rearrange the structure to @@ -1082,14 +1067,23 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) #define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) +/* + * In mm/slab_common.c, no suitable header to include here. + */ +void kvfree_call_rcu(struct rcu_head *head, void *ptr); + +/* + * The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the + * comment of kfree_rcu() for details. + */ #define kvfree_rcu_arg_2(ptr, rhf) \ do { \ typeof (ptr) ___p = (ptr); \ \ - if (___p) { \ - BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \ - kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ - } \ + if (___p) { \ + BUILD_BUG_ON(offsetof(typeof(*(ptr)), rhf) >= 4096); \ + kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ + } \ } while (0) #define kvfree_rcu_arg_1(ptr) \ diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 303ab9bee155..4c92d4291cce 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -16,6 +16,9 @@ struct rcu_synchronize { struct rcu_head head; struct completion completion; + + /* This is for debugging. */ + struct rcu_gp_oldstate oldstate; }; void wakeme_after_rcu(struct rcu_head *head); @@ -65,4 +68,15 @@ static inline void cond_resched_rcu(void) #endif } +// Has the current task blocked within its current RCU read-side +// critical section? +static inline bool has_rcu_reader_blocked(void) +{ +#ifdef CONFIG_PREEMPT_RCU + return !list_empty(¤t->rcu_node_entry); +#else + return false; +#endif +} + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index fe42315f667f..f519cd680228 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -90,41 +90,6 @@ static inline void synchronize_rcu_expedited(void) synchronize_rcu(); } -/* - * Add one more declaration of kvfree() here. It is - * not so straight forward to just include <linux/mm.h> - * where it is defined due to getting many compile - * errors caused by that include. - */ -extern void kvfree(const void *addr); - -static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr) -{ - if (head) { - call_rcu(head, (rcu_callback_t) ((void *) head - ptr)); - return; - } - - // kvfree_rcu(one_arg) call. - might_sleep(); - synchronize_rcu(); - kvfree(ptr); -} - -static inline void kvfree_rcu_barrier(void) -{ - rcu_barrier(); -} - -#ifdef CONFIG_KASAN_GENERIC -void kvfree_call_rcu(struct rcu_head *head, void *ptr); -#else -static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr) -{ - __kvfree_call_rcu(head, ptr); -} -#endif - void rcu_qs(void); static inline void rcu_softirq_qs(void) @@ -164,7 +129,6 @@ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } static inline void rcu_momentary_eqs(void) { } -static inline void kfree_rcu_scheduler_running(void) { } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 27d86d912781..9d2d7bd251d4 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,12 +34,9 @@ static inline void rcu_virt_note_context_switch(void) } void synchronize_rcu_expedited(void); -void kvfree_call_rcu(struct rcu_head *head, void *ptr); -void kvfree_rcu_barrier(void); void rcu_barrier(void); void rcu_momentary_eqs(void); -void kfree_rcu_scheduler_running(void); struct rcu_gp_oldstate { unsigned long rgos_norm; @@ -103,7 +100,7 @@ extern int rcu_scheduler_active; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); -#ifndef CONFIG_PREEMPTION +#ifndef CONFIG_PREEMPT_RCU void rcu_all_qs(void); #endif diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 27343424225c..9ad134a04b41 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -4,18 +4,7 @@ #include <linux/rcupdate.h> #include <linux/sched/signal.h> - -/* - * rcuwait provides a way of blocking and waking up a single - * task in an rcu-safe manner. - * - * The only time @task is non-nil is when a user is blocked (or - * checking if it needs to) on a condition, and reset as soon as we - * know that the condition has succeeded and are awoken. - */ -struct rcuwait { - struct task_struct __rcu *task; -}; +#include <linux/types.h> #define __RCUWAIT_INITIALIZER(name) \ { .task = NULL, } diff --git a/include/linux/reboot.h b/include/linux/reboot.h index abcdde4df697..aa08c3bbbf59 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -177,16 +177,38 @@ void ctrl_alt_del(void); extern void orderly_poweroff(bool force); extern void orderly_reboot(void); -void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown); -static inline void hw_protection_reboot(const char *reason, int ms_until_forced) -{ - __hw_protection_shutdown(reason, ms_until_forced, false); -} +/** + * enum hw_protection_action - Hardware protection action + * + * @HWPROT_ACT_DEFAULT: + * The default action should be taken. This is HWPROT_ACT_SHUTDOWN + * by default, but can be overridden. + * @HWPROT_ACT_SHUTDOWN: + * The system should be shut down (powered off) for HW protection. + * @HWPROT_ACT_REBOOT: + * The system should be rebooted for HW protection. + */ +enum hw_protection_action { HWPROT_ACT_DEFAULT, HWPROT_ACT_SHUTDOWN, HWPROT_ACT_REBOOT }; -static inline void hw_protection_shutdown(const char *reason, int ms_until_forced) +void __hw_protection_trigger(const char *reason, int ms_until_forced, + enum hw_protection_action action); + +/** + * hw_protection_trigger - Trigger default emergency system hardware protection action + * + * @reason: Reason of emergency shutdown or reboot to be printed. + * @ms_until_forced: Time to wait for orderly shutdown or reboot before + * triggering it. Negative value disables the forced + * shutdown or reboot. + * + * Initiate an emergency system shutdown or reboot in order to protect + * hardware from further damage. The exact action taken is controllable at + * runtime and defaults to shutdown. + */ +static inline void hw_protection_trigger(const char *reason, int ms_until_forced) { - __hw_protection_shutdown(reason, ms_until_forced, true); + __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_DEFAULT); } /* diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 35f039ecb272..80dc023ac2bf 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -87,6 +87,15 @@ * The decrements dec_and_test() and sub_and_test() also provide acquire * ordering on success. * + * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() provide + * acquire and release ordering for cases when the memory occupied by the + * object might be reused to store another object. This is important for the + * cases where secondary validation is required to detect such reuse, e.g. + * SLAB_TYPESAFE_BY_RCU. The secondary validation checks have to happen after + * the refcount is taken, hence acquire order is necessary. Similarly, when the + * object is initialized, all stores to its attributes should be visible before + * the refcount is set, otherwise a stale attribute value might be used by + * another task which succeeds in taking a refcount to the new object. */ #ifndef _LINUX_REFCOUNT_H @@ -126,6 +135,31 @@ static inline void refcount_set(refcount_t *r, int n) } /** + * refcount_set_release - set a refcount's value with release ordering + * @r: the refcount + * @n: value to which the refcount will be set + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides release memory ordering which will order previous memory operations + * against this store. This ensures all updates to this object are visible + * once the refcount is set and stale values from the object previously + * occupying this memory are overwritten with new ones. + * + * This function should be called only after new object is fully initialized. + * After this call the object should be considered visible to other tasks even + * if it was not yet added into an object collection normally used to discover + * it. This is because other tasks might have discovered the object previously + * occupying the same memory and after memory reuse they can succeed in taking + * refcount to the new object and start using it. + */ +static inline void refcount_set_release(refcount_t *r, int n) +{ + atomic_set_release(&r->refs, n); +} + +/** * refcount_read - get a refcount's value * @r: the refcount * @@ -178,6 +212,71 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } +static inline __must_check __signed_wrap +bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp, + int limit) +{ + int old = refcount_read(r); + + do { + if (!old) + break; + + if (i > limit - old) { + if (oldp) + *oldp = old; + return false; + } + } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i)); + + if (oldp) + *oldp = old; + + if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); + + return old; +} + +static inline __must_check bool +__refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit) +{ + return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit); +} + +static inline __must_check __signed_wrap +bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX); +} + +/** + * refcount_add_not_zero_acquire - add a value to a refcount with acquire ordering unless it is 0 + * + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides acquire memory ordering on success, it is assumed the caller has + * guaranteed the object memory to be stable (RCU, etc.). It does provide a + * control dependency and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc_not_zero_acquire() should instead be used to increment a + * reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ +static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t *r) +{ + return __refcount_add_not_zero_acquire(i, r, NULL); +} + static inline __signed_wrap void __refcount_add(int i, refcount_t *r, int *oldp) { @@ -236,6 +335,32 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) return __refcount_inc_not_zero(r, NULL); } +static inline __must_check bool __refcount_inc_not_zero_acquire(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero_acquire(1, r, oldp); +} + +/** + * refcount_inc_not_zero_acquire - increment a refcount with acquire ordering unless it is 0 + * @r: the refcount to increment + * + * Similar to refcount_inc_not_zero(), but provides acquire memory ordering on + * success. + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides acquire memory ordering on success, it is assumed the caller has + * guaranteed the object memory to be stable (RCU, etc.). It does provide a + * control dependency and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise + */ +static inline __must_check bool refcount_inc_not_zero_acquire(refcount_t *r) +{ + return __refcount_inc_not_zero_acquire(r, NULL); +} + static inline void __refcount_inc(refcount_t *r, int *oldp) { __refcount_add(1, r, oldp); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index fd41baccbf3e..d17c5ea3d55d 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -506,6 +506,32 @@ struct regmap_range_cfg { unsigned int window_len; }; +/** + * struct regmap_sdw_mbq_cfg - Configuration for Multi-Byte Quantities + * + * @mbq_size: Callback returning the actual size of the given register. + * @deferrable: Callback returning true if the hardware can defer + * transactions to the given register. Deferral should + * only be used by SDCA parts and typically which controls + * are deferrable will be specified in either as a hard + * coded list or from the DisCo tables in the platform + * firmware. + * + * @timeout_us: The time in microseconds after which waiting for a deferred + * transaction should time out. + * @retry_us: The time in microseconds between polls of the function busy + * status whilst waiting for an opportunity to retry a deferred + * transaction. + * + * Provides additional configuration required for SoundWire MBQ register maps. + */ +struct regmap_sdw_mbq_cfg { + int (*mbq_size)(struct device *dev, unsigned int reg); + bool (*deferrable)(struct device *dev, unsigned int reg); + unsigned long timeout_us; + unsigned long retry_us; +}; + struct regmap_async; typedef int (*regmap_hw_write)(void *context, const void *data, @@ -652,6 +678,7 @@ struct regmap *__regmap_init_sdw(struct sdw_slave *sdw, const char *lock_name); struct regmap *__regmap_init_sdw_mbq(struct sdw_slave *sdw, const struct regmap_config *config, + const struct regmap_sdw_mbq_cfg *mbq_config, struct lock_class_key *lock_key, const char *lock_name); struct regmap *__regmap_init_spi_avmm(struct spi_device *spi, @@ -713,6 +740,7 @@ struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw, const char *lock_name); struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw, const struct regmap_config *config, + const struct regmap_sdw_mbq_cfg *mbq_config, struct lock_class_key *lock_key, const char *lock_name); struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, @@ -942,7 +970,22 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); */ #define regmap_init_sdw_mbq(sdw, config) \ __regmap_lockdep_wrapper(__regmap_init_sdw_mbq, #config, \ - sdw, config) + sdw, config, NULL) + +/** + * regmap_init_sdw_mbq_cfg() - Initialise MBQ SDW register map with config + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * @mbq_config: Properties for the MBQ registers + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define regmap_init_sdw_mbq_cfg(sdw, config, mbq_config) \ + __regmap_lockdep_wrapper(__regmap_init_sdw_mbq, #config, \ + sdw, config, mbq_config) /** * regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave @@ -1155,7 +1198,22 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); */ #define devm_regmap_init_sdw_mbq(sdw, config) \ __regmap_lockdep_wrapper(__devm_regmap_init_sdw_mbq, #config, \ - sdw, config) + sdw, config, NULL) + +/** + * devm_regmap_init_sdw_mbq_cfg() - Initialise managed MBQ SDW register map with config + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * @mbq_config: Properties for the MBQ registers + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sdw_mbq_cfg(sdw, config, mbq_config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sdw_mbq, \ + #config, sdw, config, mbq_config) /** * devm_regmap_init_slimbus() - Initialise managed register map @@ -1244,7 +1302,7 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count); -int regmap_multi_reg_read(struct regmap *map, unsigned int *reg, void *val, +int regmap_multi_reg_read(struct regmap *map, const unsigned int *reg, void *val, size_t val_count); int regmap_update_bits_base(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val, @@ -1294,6 +1352,7 @@ bool regmap_can_raw_write(struct regmap *map); size_t regmap_get_raw_read_max(struct regmap *map); size_t regmap_get_raw_write_max(struct regmap *map); +void regcache_sort_defaults(struct reg_default *defaults, unsigned int ndefaults); int regcache_sync(struct regmap *map); int regcache_sync_region(struct regmap *map, unsigned int min, unsigned int max); @@ -1985,6 +2044,12 @@ static inline bool regmap_might_sleep(struct regmap *map) return true; } +static inline void regcache_sort_defaults(struct reg_default *defaults, + unsigned int ndefaults) +{ + WARN_ONCE(1, "regmap API is disabled"); +} + static inline int regcache_sync(struct regmap *map) { WARN_ONCE(1, "regmap API is disabled"); diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index bcba3935c6f9..56fe2693d9b2 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -233,6 +233,11 @@ int regulator_sync_voltage(struct regulator *regulator); int regulator_set_current_limit(struct regulator *regulator, int min_uA, int max_uA); int regulator_get_current_limit(struct regulator *regulator); +int regulator_get_unclaimed_power_budget(struct regulator *regulator); +int regulator_request_power_budget(struct regulator *regulator, + unsigned int pw_req); +void regulator_free_power_budget(struct regulator *regulator, + unsigned int pw); int regulator_set_mode(struct regulator *regulator, unsigned int mode); unsigned int regulator_get_mode(struct regulator *regulator); @@ -526,6 +531,22 @@ static inline int regulator_get_current_limit(struct regulator *regulator) return 0; } +static inline int regulator_get_unclaimed_power_budget(struct regulator *regulator) +{ + return INT_MAX; +} + +static inline int regulator_request_power_budget(struct regulator *regulator, + unsigned int pw_req) +{ + return -EOPNOTSUPP; +} + +static inline void regulator_free_power_budget(struct regulator *regulator, + unsigned int pw) +{ +} + static inline int regulator_set_mode(struct regulator *regulator, unsigned int mode) { @@ -656,6 +677,12 @@ regulator_is_equal(struct regulator *reg1, struct regulator *reg2) #endif #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_REGULATOR) +struct regulator *__must_check of_regulator_get(struct device *dev, + struct device_node *node, + const char *id); +struct regulator *__must_check devm_of_regulator_get(struct device *dev, + struct device_node *node, + const char *id); struct regulator *__must_check of_regulator_get_optional(struct device *dev, struct device_node *node, const char *id); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 5b66caf1695d..4a216fdba354 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -656,6 +656,8 @@ struct regulator_dev { int cached_err; bool use_cached_err; spinlock_t err_lock; + + int pw_requested_mW; }; /* diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index b3db09a7429b..1fc440c5c4c7 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -113,6 +113,7 @@ struct notification_limit { * @min_uA: Smallest current consumers may set. * @max_uA: Largest current consumers may set. * @ilim_uA: Maximum input current. + * @pw_budget_mW: Power budget for the regulator in mW. * @system_load: Load that isn't captured by any consumer requests. * * @over_curr_limits: Limits for acting on over current. @@ -185,6 +186,7 @@ struct regulation_constraints { int max_uA; int ilim_uA; + int pw_budget_mW; int system_load; /* used for coupled regulators */ diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index 243633c8dceb..b427b5873de1 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -10,6 +10,7 @@ enum pca9450_chip_type { PCA9450_TYPE_PCA9450A = 0, PCA9450_TYPE_PCA9450BC, PCA9450_TYPE_PCA9451A, + PCA9450_TYPE_PCA9452, PCA9450_TYPE_AMOUNT, }; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d94abba1c716..880351ca3dfc 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/pid.h> +#include <linux/resctrl_types.h> /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 @@ -25,6 +26,24 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX +/* Walk all possible resources, with variants for only controls or monitors. */ +#define for_each_rdt_resource(_r) \ + for ((_r) = resctrl_arch_get_resource(0); \ + (_r) && (_r)->rid < RDT_NUM_RESOURCES; \ + (_r) = resctrl_arch_get_resource((_r)->rid + 1)) + +#define for_each_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable || (r)->mon_capable) + +#define for_each_alloc_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable) + +#define for_each_mon_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->mon_capable) + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -40,13 +59,42 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) /* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. + * struct pseudo_lock_region - pseudo-lock region information + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs + * @closid: The closid that this pseudo-locked region uses + * @d: RDT domain to which this pseudo-locked region + * belongs + * @cbm: bitmask of the pseudo-locked region + * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread + * completion + * @thread_done: variable used by waitqueue to test if pseudo-locking + * thread completed + * @cpu: core associated with the cache on which the setup code + * will be run + * @line_size: size of the cache lines + * @size: size of pseudo-locked region in bytes + * @kmem: the kernel memory associated with pseudo-locked region + * @minor: minor number of character device associated with this + * region + * @debugfs_dir: pointer to this region's directory in the debugfs + * filesystem + * @pm_reqs: Power management QoS requests related to this region */ -enum resctrl_event_id { - QOS_L3_OCCUP_EVENT_ID = 0x01, - QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, - QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +struct pseudo_lock_region { + struct resctrl_schema *s; + u32 closid; + struct rdt_ctrl_domain *d; + u32 cbm; + wait_queue_head_t lock_thread_wq; + int thread_done; + int cpu; + unsigned int line_size; + unsigned int size; + void *kmem; + unsigned int minor; + struct dentry *debugfs_dir; + struct list_head pm_reqs; }; /** @@ -155,6 +203,7 @@ enum membw_throttle_mode { /** * struct resctrl_membw - Memory bandwidth allocation related data * @min_bw: Minimum memory bandwidth percentage user can request + * @max_bw: Maximum memory bandwidth value, used as the reset value * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale * @arch_needs_linear: True if we can't configure non-linear resources @@ -165,6 +214,7 @@ enum membw_throttle_mode { */ struct resctrl_membw { u32 min_bw; + u32 max_bw; u32 bw_gran; u32 delay_linear; bool arch_needs_linear; @@ -173,7 +223,6 @@ struct resctrl_membw { u32 *mb_map; }; -struct rdt_parse_data; struct resctrl_schema; enum resctrl_scope { @@ -183,6 +232,16 @@ enum resctrl_scope { }; /** + * enum resctrl_schema_fmt - The format user-space provides for a schema. + * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex. + * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number. + */ +enum resctrl_schema_fmt { + RESCTRL_SCHEMA_BITMAP, + RESCTRL_SCHEMA_RANGE, +}; + +/** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine @@ -195,12 +254,10 @@ enum resctrl_scope { * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. - * @data_width: Character width of data when displaying - * @default_ctrl: Specifies default cache cbm or memory B/W percent. - * @format_str: Per resource format string to show domain value - * @parse_ctrlval: Per resource function pointer to parse control values + * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events - * @fflags: flags to choose base and info files + * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth + * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { @@ -215,22 +272,25 @@ struct rdt_resource { struct list_head ctrl_domains; struct list_head mon_domains; char *name; - int data_width; - u32 default_ctrl; - const char *format_str; - int (*parse_ctrlval)(struct rdt_parse_data *data, - struct resctrl_schema *s, - struct rdt_ctrl_domain *d); + enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; - unsigned long fflags; + unsigned int mbm_cfg_mask; bool cdp_capable; }; +/* + * Get the resource that exists at this level. If the level is not supported + * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES + * will return NULL. + */ +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); + /** * struct resctrl_schema - configuration abilities of a resource presented to * user-space * @list: Member of resctrl_schema_all. * @name: The name to use in the "schemata" file. + * @fmt_str: Format string to show domain value. * @conf_type: Whether this schema is specific to code/data. * @res: The resource structure exported by the architecture to describe * the hardware that is configured by this schema. @@ -241,16 +301,104 @@ struct rdt_resource { struct resctrl_schema { struct list_head list; char name[8]; + const char *fmt_str; enum resctrl_conf_type conf_type; struct rdt_resource *res; u32 num_closid; }; +struct resctrl_cpu_defaults { + u32 closid; + u32 rmid; +}; + +struct resctrl_mon_config_info { + struct rdt_resource *r; + struct rdt_mon_domain *d; + u32 evtid; + u32 mon_config; +}; + +/** + * resctrl_arch_sync_cpu_closid_rmid() - Refresh this CPU's CLOSID and RMID. + * Call via IPI. + * @info: If non-NULL, a pointer to a struct resctrl_cpu_defaults + * specifying the new CLOSID and RMID for tasks in the default + * resctrl ctrl and mon group when running on this CPU. If NULL, + * this CPU is not re-assigned to a different default group. + * + * Propagates reassignment of CPUs and/or tasks to different resctrl groups + * when requested by the resctrl core code. + * + * This function records the per-cpu defaults specified by @info (if any), + * and then reconfigures the CPU's hardware CLOSID and RMID for subsequent + * execution based on @current, in the same way as during a task switch. + */ +void resctrl_arch_sync_cpu_closid_rmid(void *info); + +/** + * resctrl_get_default_ctrl() - Return the default control value for this + * resource. + * @r: The resource whose default control type is queried. + */ +static inline u32 resctrl_get_default_ctrl(struct rdt_resource *r) +{ + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + return BIT_MASK(r->cache.cbm_len) - 1; + case RESCTRL_SCHEMA_RANGE: + return r->membw.max_bw; + } + + return WARN_ON_ONCE(1); +} + /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); + +/** + * resctrl_arch_mon_event_config_write() - Write the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and writes the + * event config_info->mon_config into hardware. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_write(void *config_info); + +/** + * resctrl_arch_mon_event_config_read() - Read the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and reads the + * hardware config value into config_info->mon_config. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_read(void *config_info); + +/* For use by arch code to remap resctrl's smaller CDP CLOSID range */ +static inline u32 resctrl_get_config_index(u32 closid, + enum resctrl_conf_type type) +{ + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return closid * 2 + 1; + case CDP_DATA: + return closid * 2; + } +} + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. @@ -314,6 +462,20 @@ static inline void resctrl_arch_rmid_read_context_check(void) } /** + * resctrl_find_domain() - Search for a domain id in a resource domain list. + * @h: The domain list to search. + * @id: The domain id to search for. + * @pos: A pointer to position in the list id should be inserted. + * + * Search the domain list to find the domain id. If the domain id is + * found, return the domain. NULL otherwise. If the domain id is not + * found (and NULL returned) then the first domain with id bigger than + * the input id can be returned to the caller via @pos. + */ +struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, + struct list_head **pos); + +/** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. * @r: The domain's resource. @@ -340,7 +502,19 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, */ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); +/** + * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its + * default. + * @r: The resctrl resource to reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; +int __init resctrl_init(void); +void __exit resctrl_exit(void); + #endif /* _RESCTRL_H */ diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h new file mode 100644 index 000000000000..f26450b3326b --- /dev/null +++ b/include/linux/resctrl_types.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Arm Ltd. + * Based on arch/x86/kernel/cpu/resctrl/internal.h + */ + +#ifndef __LINUX_RESCTRL_TYPES_H +#define __LINUX_RESCTRL_TYPES_H + +/* Reads to Local DRAM Memory */ +#define READS_TO_LOCAL_MEM BIT(0) + +/* Reads to Remote DRAM Memory */ +#define READS_TO_REMOTE_MEM BIT(1) + +/* Non-Temporal Writes to Local Memory */ +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) + +/* Non-Temporal Writes to Remote Memory */ +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) + +/* Reads to Local Memory the system identifies as "Slow Memory" */ +#define READS_TO_LOCAL_S_MEM BIT(4) + +/* Reads to Remote Memory the system identifies as "Slow Memory" */ +#define READS_TO_REMOTE_S_MEM BIT(5) + +/* Dirty Victims to All Types of Memory */ +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) + +/* Max event bits supported */ +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) + +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + +#endif /* __LINUX_RESCTRL_TYPES_H */ diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 997b34197385..6816e4c5f3f0 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -241,7 +241,7 @@ bool rfkill_soft_blocked(struct rfkill *rfkill); * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type * - * Returns enum rfkill_type that corresponds to the name. + * Returns: enum rfkill_type that corresponds to the name. */ enum rfkill_type rfkill_find_type(const char *name); diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 8463a128e2f4..6c85b28ea30b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1259,7 +1259,7 @@ static inline int rhashtable_replace_fast( static inline void rhltable_walk_enter(struct rhltable *hlt, struct rhashtable_iter *iter) { - return rhashtable_walk_enter(&hlt->ht, iter); + rhashtable_walk_enter(&hlt->ht, iter); } /** @@ -1275,12 +1275,12 @@ static inline void rhltable_free_and_destroy(struct rhltable *hlt, void *arg), void *arg) { - return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); + rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); } static inline void rhltable_destroy(struct rhltable *hlt) { - return rhltable_free_and_destroy(hlt, NULL, NULL); + rhltable_free_and_destroy(hlt, NULL, NULL); } #endif /* _LINUX_RHASHTABLE_H */ diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 17fbb7855295..56e27263acf8 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -92,10 +92,10 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long range_size, + unsigned long scratch_size, struct lock_class_key *key); -bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, - long *data); +void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size); /* * Because the ring buffer is generic, if other users of the ring buffer get @@ -113,11 +113,11 @@ bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, * traced by ftrace, it can produce lockdep warnings. We need to keep each * ring buffer's lock class separate. */ -#define ring_buffer_alloc_range(size, flags, order, start, range_size) \ +#define ring_buffer_alloc_range(size, flags, order, start, range_size, s_size) \ ({ \ static struct lock_class_key __key; \ __ring_buffer_alloc_range((size), (flags), (order), (start), \ - (range_size), &__key); \ + (range_size), (s_size), &__key); \ }) typedef bool (*ring_buffer_cond_fn)(void *data); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 683a04088f3f..6b82b618846e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -13,6 +13,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/memremap.h> +#include <linux/bit_spinlock.h> /* * The anon_vma heads a list of private "related" vmas, to scan if @@ -173,6 +174,214 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, struct anon_vma *folio_get_anon_vma(const struct folio *folio); +#ifdef CONFIG_MM_ID +static __always_inline void folio_lock_large_mapcount(struct folio *folio) +{ + bit_spin_lock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static __always_inline void folio_unlock_large_mapcount(struct folio *folio) +{ + __bit_spin_unlock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static inline unsigned int folio_mm_id(const struct folio *folio, int idx) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + return folio->_mm_id[idx] & MM_ID_MASK; +} + +static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + folio->_mm_id[idx] &= ~MM_ID_MASK; + folio->_mm_id[idx] |= id; +} + +static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio, + int diff, mm_id_t mm_id) +{ + VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio)); + VM_WARN_ON_ONCE(diff <= 0); + VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX); + + /* + * Make sure we can detect at least one complete PTE mapping of the + * folio in a single MM as "exclusively mapped". This is primarily + * a check on 32bit, where we currently reduce the size of the per-MM + * mapcount to a short. + */ + VM_WARN_ON_ONCE(diff > folio_large_nr_pages(folio)); + VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY && + folio->_mm_id_mapcount[0] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY && + folio->_mm_id_mapcount[0] < 0); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) == MM_ID_DUMMY && + folio->_mm_id_mapcount[1] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY && + folio->_mm_id_mapcount[1] < 0); + VM_WARN_ON_ONCE(!folio_mapped(folio) && + folio_test_large_maybe_mapped_shared(folio)); +} + +static __always_inline void folio_set_large_mapcount(struct folio *folio, + int mapcount, struct vm_area_struct *vma) +{ + __folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY); + + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); + folio->_mm_id_mapcount[0] = mapcount - 1; + folio_set_mm_id(folio, 0, vma->vm_mm->mm_id); +} + +static __always_inline int folio_add_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) + diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * If a folio is mapped more than once into an MM on 32bit, we + * can in theory overflow the per-MM mapcount (although only for + * fairly large folios), turning it negative. In that case, just + * free up the slot and mark the folio "mapped shared", otherwise + * we might be in trouble when unmapping pages later. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[0] < 0)) { + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[1] < 0)) { + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 0) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 0, mm_id); + folio->_mm_id_mapcount[0] = diff - 1; + /* We might have other mappings already. */ + if (new_mapcount_val != diff - 1) + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } else if (folio_mm_id(folio, 1) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 1, mm_id); + folio->_mm_id_mapcount[1] = diff - 1; + /* Slot 0 certainly has mappings as well. */ + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; +} +#define folio_add_large_mapcount folio_add_return_large_mapcount + +static __always_inline int folio_sub_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) - diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * There are valid corner cases where we might underflow a per-MM + * mapcount (some mappings added when no slot was free, some mappings + * added once a slot was free), so we always set it to -1 once we go + * negative. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] -= diff; + if (folio->_mm_id_mapcount[0] >= 0) + goto out; + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] -= diff; + if (folio->_mm_id_mapcount[1] >= 0) + goto out; + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + } + + /* + * If one MM slot owns all mappings, the folio is mapped exclusively. + * Note that if the folio is now unmapped (new_mapcount_val == -1), both + * slots must be free (mapcount == -1), and we'll also mark it as + * exclusive. + */ + if (folio->_mm_id_mapcount[0] == new_mapcount_val || + folio->_mm_id_mapcount[1] == new_mapcount_val) + folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT; +out: + folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; +} +#define folio_sub_large_mapcount folio_sub_return_large_mapcount +#else /* !CONFIG_MM_ID */ +/* + * See __folio_rmap_sanity_checks(), we might map large folios even without + * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now. + */ +static inline void folio_set_large_mapcount(struct folio *folio, int mapcount, + struct vm_area_struct *vma) +{ + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); +} + +static inline void folio_add_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_add(diff, &folio->_large_mapcount); +} + +static inline int folio_add_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} + +static inline void folio_sub_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_sub(diff, &folio->_large_mapcount); +} + +static inline int folio_sub_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} +#endif /* CONFIG_MM_ID */ + +#define folio_inc_large_mapcount(folio, vma) \ + folio_add_large_mapcount(folio, 1, vma) +#define folio_inc_return_large_mapcount(folio, vma) \ + folio_add_return_large_mapcount(folio, 1, vma) +#define folio_dec_large_mapcount(folio, vma) \ + folio_sub_large_mapcount(folio, 1, vma) +#define folio_dec_return_large_mapcount(folio, vma) \ + folio_sub_return_large_mapcount(folio, 1, vma) + /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -192,6 +401,7 @@ typedef int __bitwise rmap_t; enum rmap_level { RMAP_LEVEL_PTE = 0, RMAP_LEVEL_PMD, + RMAP_LEVEL_PUD, }; static inline void __folio_rmap_sanity_checks(const struct folio *folio, @@ -228,6 +438,14 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; + case RMAP_LEVEL_PUD: + /* + * Assume that we are creating a single "entire" mapping of the + * folio. + */ + VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio); + VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); + break; default: VM_WARN_ON_ONCE(true); } @@ -251,12 +469,16 @@ void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, folio_add_file_rmap_ptes(folio, page, 1, vma) void folio_add_file_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_add_file_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_remove_rmap_pte(folio, page, vma) \ folio_remove_rmap_ptes(folio, page, 1, vma) void folio_remove_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_remove_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); @@ -322,7 +544,8 @@ static inline void hugetlb_remove_rmap(struct folio *folio) } static __always_inline void __folio_dup_file_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + enum rmap_level level) { const int orig_nr_pages = nr_pages; @@ -335,14 +558,17 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, break; } - do { - atomic_inc(&page->_mapcount); - } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) { + do { + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + } + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; } } @@ -352,45 +578,47 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + nr_pages) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE); } /** * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_pmd(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif } static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma, - enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, enum rmap_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; @@ -432,18 +660,20 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, do { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); - atomic_inc(&page->_mapcount); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; ClearPageAnonExclusive(page); } atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; } return 0; @@ -455,6 +685,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mappings are duplicated * * The page range of the folio is defined by [page, page + nr_pages) @@ -473,16 +704,18 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, - RMAP_LEVEL_PTE); + return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, + src_vma, RMAP_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, + return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, RMAP_LEVEL_PTE); } @@ -491,6 +724,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mapping is duplicated * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) @@ -509,11 +743,12 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, - RMAP_LEVEL_PMD); + return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, + src_vma, RMAP_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -663,9 +898,8 @@ int folio_referenced(struct folio *, int is_locked, void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); -int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, - unsigned long end, struct page **pages, - void *arg); +struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, + void *owner, struct folio **foliop); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) @@ -739,6 +973,9 @@ unsigned long page_address_in_vma(const struct folio *folio, */ int folio_mkclean(struct folio *); +int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff, + unsigned long pfn, unsigned long nr_pages); + int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, struct vm_area_struct *vma); diff --git a/include/linux/rolling_buffer.h b/include/linux/rolling_buffer.h new file mode 100644 index 000000000000..ac15b1ffdd83 --- /dev/null +++ b/include/linux/rolling_buffer.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Rolling buffer of folios + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _ROLLING_BUFFER_H +#define _ROLLING_BUFFER_H + +#include <linux/folio_queue.h> +#include <linux/uio.h> + +/* + * Rolling buffer. Whilst the buffer is live and in use, folios and folio + * queue segments can be added to one end by one thread and removed from the + * other end by another thread. The buffer isn't allowed to be empty; it must + * always have at least one folio_queue in it so that neither side has to + * modify both queue pointers. + * + * The iterator in the buffer is extended as buffers are inserted. It can be + * snapshotted to use a segment of the buffer. + */ +struct rolling_buffer { + struct folio_queue *head; /* Producer's insertion point */ + struct folio_queue *tail; /* Consumer's removal point */ + struct iov_iter iter; /* Iterator tracking what's left in the buffer */ + u8 next_head_slot; /* Next slot in ->head */ + u8 first_tail_slot; /* First slot in ->tail */ +}; + +/* + * Snapshot of a rolling buffer. + */ +struct rolling_buffer_snapshot { + struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ + unsigned char curr_slot; /* Folio currently being read */ + unsigned char curr_order; /* Order of folio */ +}; + +/* Marks to store per-folio in the internal folio_queue structs. */ +#define ROLLBUF_MARK_1 BIT(0) +#define ROLLBUF_MARK_2 BIT(1) + +int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id, + unsigned int direction); +int rolling_buffer_make_space(struct rolling_buffer *roll); +ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll, + struct readahead_control *ractl, + struct folio_batch *put_batch); +ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio, + unsigned int flags); +struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll); +void rolling_buffer_clear(struct rolling_buffer *roll); + +static inline void rolling_buffer_advance(struct rolling_buffer *roll, size_t amount) +{ + iov_iter_advance(&roll->iter, amount); +} + +#endif /* _ROLLING_BUFFER_H */ diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 3f4d315aaec9..95da051fb155 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -170,6 +170,7 @@ struct rtc_device { /* useful timestamps */ #define RTC_TIMESTAMP_BEGIN_0000 -62167219200ULL /* 0000-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_1900 -2208988800LL /* 1900-01-01 00:00:00 */ +#define RTC_TIMESTAMP_EPOCH_GPS 315964800LL /* 1980-01-06 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */ #define RTC_TIMESTAMP_END_2063 2966371199LL /* 2063-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_2079 3471292799LL /* 2079-12-31 23:59:59 */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 14b88f551920..ea39dd23a197 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -43,6 +43,7 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +extern int rtnl_lock_interruptible(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); @@ -78,7 +79,7 @@ static inline bool lockdep_rtnl_is_held(void) * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit + * Return: the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ @@ -102,6 +103,7 @@ void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_trylock(struct net *net); +int rtnl_net_lock_killable(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); bool rtnl_net_is_locked(struct net *net); @@ -138,6 +140,11 @@ static inline int rtnl_net_trylock(struct net *net) return rtnl_trylock(); } +static inline int rtnl_net_lock_killable(struct net *net) +{ + return rtnl_lock_killable(); +} + static inline void ASSERT_RTNL_NET(struct net *net) { ASSERT_RTNL(); @@ -178,6 +185,12 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); +/* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ +struct ndo_fdb_dump_context { + unsigned long ifindex; + unsigned long fdb_idx; +}; + extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, @@ -227,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } -void netdev_set_operstate(struct net_device *dev, int newstate); +void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/rv.h b/include/linux/rv.h index 8883b41d88ec..3452b5e4b29e 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -7,7 +7,7 @@ #ifndef _LINUX_RV_H #define _LINUX_RV_H -#define MAX_DA_NAME_LEN 24 +#define MAX_DA_NAME_LEN 32 #ifdef CONFIG_RV /* @@ -56,7 +56,7 @@ struct rv_monitor { bool rv_monitoring_on(void); int rv_unregister_monitor(struct rv_monitor *monitor); -int rv_register_monitor(struct rv_monitor *monitor); +int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent); int rv_get_task_monitor_slot(void); void rv_put_task_monitor_slot(int slot); diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index d836e7440ee8..138e2f1bd08f 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -671,6 +671,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) #define SG_MITER_ATOMIC (1 << 0) /* use kmap_atomic */ #define SG_MITER_TO_SG (1 << 1) /* flush back to phys on unmap */ #define SG_MITER_FROM_SG (1 << 2) /* nop */ +#define SG_MITER_LOCAL (1 << 3) /* use kmap_local */ struct sg_mapping_iter { /* the following three fields can be accessed directly */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 376478d29e2d..f96ac1982893 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -46,6 +46,7 @@ #include <linux/rv.h> #include <linux/livepatch_sched.h> #include <linux/uidgid_types.h> +#include <linux/tracepoint-defs.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ @@ -65,6 +66,7 @@ struct mempolicy; struct nameidata; struct nsproxy; struct perf_event_context; +struct perf_ctx_data; struct pid_namespace; struct pipe_inode_info; struct rcu_node; @@ -186,6 +188,12 @@ struct user_event_mm; # define debug_rtlock_wait_restore_state() do { } while (0) #endif +#define trace_set_current_state(state_value) \ + do { \ + if (tracepoint_enabled(sched_set_state_tp)) \ + __trace_set_current_state(state_value); \ + } while (0) + /* * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to @@ -226,12 +234,14 @@ struct user_event_mm; #define __set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ + trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ + trace_set_current_state(state_value); \ smp_store_mb(current->__state, (state_value)); \ } while (0) @@ -247,6 +257,7 @@ struct user_event_mm; \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ debug_special_state_change((state_value)); \ + trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) @@ -282,6 +293,7 @@ struct user_event_mm; raw_spin_lock(¤t->pi_lock); \ current->saved_state = current->__state; \ debug_rtlock_wait_set_state(); \ + trace_set_current_state(TASK_RTLOCK_WAIT); \ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ raw_spin_unlock(¤t->pi_lock); \ } while (0); @@ -291,6 +303,7 @@ struct user_event_mm; lockdep_assert_irqs_disabled(); \ raw_spin_lock(¤t->pi_lock); \ debug_rtlock_wait_restore_state(); \ + trace_set_current_state(current->saved_state); \ WRITE_ONCE(current->__state, current->saved_state); \ current->saved_state = TASK_RUNNING; \ raw_spin_unlock(¤t->pi_lock); \ @@ -327,6 +340,10 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); +/* wrapper function to trace from this header file */ +DECLARE_TRACEPOINT(sched_set_state_tp); +extern void __trace_set_current_state(int state_value); + /** * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode @@ -382,6 +399,11 @@ enum uclamp_id { #ifdef CONFIG_SMP extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; +extern void sched_domains_mutex_lock(void); +extern void sched_domains_mutex_unlock(void); +#else +static inline void sched_domains_mutex_lock(void) { } +static inline void sched_domains_mutex_unlock(void) { } #endif struct sched_param { @@ -398,6 +420,12 @@ struct sched_info { /* Time spent waiting on a runqueue: */ unsigned long long run_delay; + /* Max time spent waiting on a runqueue: */ + unsigned long long max_run_delay; + + /* Min time spent waiting on a runqueue: */ + unsigned long long min_run_delay; + /* Timestamps: */ /* When did we last run on a CPU? */ @@ -1211,6 +1239,10 @@ struct task_struct { struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + struct mutex *blocker_mutex; +#endif + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP int non_block_count; #endif @@ -1305,6 +1337,7 @@ struct task_struct { struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; + struct perf_ctx_data __rcu *perf_ctx_data; #endif #ifdef CONFIG_DEBUG_PREEMPT unsigned long preempt_disable_ip; @@ -1375,6 +1408,15 @@ struct task_struct { * with respect to preemption. */ unsigned long rseq_event_mask; +# ifdef CONFIG_DEBUG_RSEQ + /* + * This is a place holder to save a copy of the rseq fields for + * validation of read-only fields. The struct rseq has a + * variable-length array at the end, so it cannot be used + * directly. Reserve a size large enough for the known fields. + */ + char rseq_fields[sizeof(struct rseq)]; +# endif #endif #ifdef CONFIG_SCHED_MM_CID @@ -1945,11 +1987,10 @@ static inline void kick_process(struct task_struct *tsk) { } #endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); - -static inline void set_task_comm(struct task_struct *tsk, const char *from) -{ - __set_task_comm(tsk, from, false); -} +#define set_task_comm(tsk, from) ({ \ + BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \ + __set_task_comm(tsk, from, false); \ +}) /* * - Why not use task_lock()? diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 3a912ab42bb5..f9aabbc9d22e 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -34,7 +34,11 @@ static inline bool dl_time_before(u64 a, u64 b) struct root_domain; extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); +extern void dl_clear_root_domain_cpu(int cpu); #endif /* CONFIG_SMP */ +extern u64 dl_cookie; +extern bool dl_bw_visited(int cpu, u64 cookie); + #endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index b5035afa2396..35ed4577a6cc 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -35,12 +35,10 @@ extern void show_stack(struct task_struct *task, unsigned long *sp, extern void sched_show_task(struct task_struct *p); -#ifdef CONFIG_SCHED_DEBUG struct seq_file; extern void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); -#endif /* Attach to any functions which should be ignored in wchan output. */ #define __sched __section(".sched.text") diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1d70a9867fb1..f7545430a548 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -146,6 +146,7 @@ struct sched_ext_entity { u32 weight; s32 sticky_cpu; s32 holding_cpu; + s32 selected_cpu; u32 kf_mask; /* see scx_kf_mask above */ struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */ atomic_long_t ops_state; diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 412cdaba33eb..17e04859b9a4 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -18,10 +18,6 @@ extern int sched_cpu_dying(unsigned int cpu); # define sched_cpu_dying NULL #endif -#ifdef CONFIG_HOTPLUG_CPU -extern void idle_task_exit(void); -#else static inline void idle_task_exit(void) {} -#endif #endif /* _LINUX_SCHED_HOTPLUG_H */ diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index e670ac282333..439f6029d3b9 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void) return unlikely(tif_need_resched()); } +static __always_inline void current_clr_polling(void) +{ + __current_clr_polling(); + + /* + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. + * Once the bit is cleared, we'll get IPIs with every new + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also + * fold. + */ + smp_mb__after_atomic(); /* paired with resched_curr() */ + + preempt_fold_need_resched(); +} + #else static inline void __current_set_polling(void) { } static inline void __current_clr_polling(void) { } @@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void) { return unlikely(tif_need_resched()); } -#endif static __always_inline void current_clr_polling(void) { __current_clr_polling(); - /* - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. - * Once the bit is cleared, we'll get IPIs with every new - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also - * fold. - */ smp_mb(); /* paired with resched_curr() */ preempt_fold_need_resched(); } +#endif #endif /* _LINUX_SCHED_IDLE_H */ diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 2b461129d1fa..d8501f4709b5 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -7,16 +7,21 @@ #include <linux/tick.h> enum hk_type { - HK_TYPE_TIMER, - HK_TYPE_RCU, - HK_TYPE_MISC, - HK_TYPE_SCHED, - HK_TYPE_TICK, HK_TYPE_DOMAIN, - HK_TYPE_WQ, HK_TYPE_MANAGED_IRQ, - HK_TYPE_KTHREAD, - HK_TYPE_MAX + HK_TYPE_KERNEL_NOISE, + HK_TYPE_MAX, + + /* + * The following housekeeping types are only set by the nohz_full + * boot commandline option. So they can share the same value. + */ + HK_TYPE_TICK = HK_TYPE_KERNEL_NOISE, + HK_TYPE_TIMER = HK_TYPE_KERNEL_NOISE, + HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, + HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, + HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, + HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 928a626725e6..b13474825130 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -531,6 +531,13 @@ enum { static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { + /* + * The atomic_read() below prevents CSE. The following should + * help the compiler generate more efficient code on architectures + * where sync_core_before_usermode() is a no-op. + */ + if (!IS_ENABLED(CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE)) + return; if (current->mm != mm) return; if (likely(!(atomic_read(&mm->membarrier_state) & diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index d5d03d919df8..1ef1edbaaf79 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -136,7 +136,8 @@ struct signal_struct { #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ - unsigned int next_posix_timer_id; + unsigned int timer_create_restore_ids:1; + atomic_t next_posix_timer_id; struct hlist_head posix_timers; struct hlist_head ignored_posix_timers; diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index fb1e295e7e63..166b19af956f 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -12,7 +12,7 @@ static __always_inline bool sched_smt_active(void) return static_branch_likely(&sched_smt_present); } #else -static inline bool sched_smt_active(void) { return false; } +static __always_inline bool sched_smt_active(void) { return false; } #endif void arch_smt_update(void); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 4237daa5ac7a..7b4301b7235f 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -25,16 +25,12 @@ enum { }; #undef SD_FLAG -#ifdef CONFIG_SCHED_DEBUG - struct sd_flag_debug { unsigned int meta_flags; char *name; }; extern const struct sd_flag_debug sd_flag_debug[]; -#endif - #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { @@ -114,7 +110,10 @@ struct sched_domain { unsigned int lb_count[CPU_MAX_IDLE_TYPES]; unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES]; unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; @@ -140,9 +139,7 @@ struct sched_domain { unsigned int ttwu_move_affine; unsigned int ttwu_move_balance; #endif -#ifdef CONFIG_SCHED_DEBUG char *name; -#endif union { void *private; /* used during construction */ struct rcu_head rcu; /* used during destruction */ @@ -165,10 +162,6 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } -extern void partition_sched_domains_locked(int ndoms_new, - cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new); - extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); @@ -198,30 +191,18 @@ struct sched_domain_topology_level { int flags; int numa_level; struct sd_data data; -#ifdef CONFIG_SCHED_DEBUG char *name; -#endif }; extern void __init set_sched_topology(struct sched_domain_topology_level *tl); -#ifdef CONFIG_SCHED_DEBUG # define SD_INIT_NAME(type) .name = #type -#else -# define SD_INIT_NAME(type) -#endif #else /* CONFIG_SMP */ struct sched_domain_attr; static inline void -partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new) -{ -} - -static inline void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 06cd8fb2f409..0f28b4623ad4 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -63,4 +63,38 @@ extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); +/* Spin unlock helpers to unlock and call wake_up_q with preempt disabled */ +static inline +void raw_spin_unlock_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock(lock); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} + +static inline +void raw_spin_unlock_irq_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock_irq(lock); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} + +static inline +void raw_spin_unlock_irqrestore_wake(raw_spinlock_t *lock, unsigned long flags, + struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock_irqrestore(lock, flags); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} #endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h index 066216f1357a..53b356a26414 100644 --- a/include/linux/scmi_imx_protocol.h +++ b/include/linux/scmi_imx_protocol.h @@ -13,10 +13,11 @@ #include <linux/notifier.h> #include <linux/types.h> -enum scmi_nxp_protocol { - SCMI_PROTOCOL_IMX_BBM = 0x81, - SCMI_PROTOCOL_IMX_MISC = 0x84, -}; +#define SCMI_PROTOCOL_IMX_BBM 0x81 +#define SCMI_PROTOCOL_IMX_MISC 0x84 + +#define SCMI_IMX_VENDOR "NXP" +#define SCMI_IMX_SUBVENDOR "IMX" struct scmi_imx_bbm_proto_ops { int (*rtc_time_set)(const struct scmi_protocol_handle *ph, u32 id, diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 836a7e200f39..6719949135c9 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -222,7 +222,6 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - /* __u8 payload[]; */ }; struct sctp_data_chunk { @@ -239,7 +238,6 @@ struct sctp_idatahdr { __u32 ppid; __be32 fsn; }; - __u8 payload[0]; }; struct sctp_idata_chunk { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index e45531455d3b..9b959972bf4a 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -22,21 +22,17 @@ #include <linux/atomic.h> #include <asm/seccomp.h> +extern int __secure_computing(void); + #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER -extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) { if (unlikely(test_syscall_work(SECCOMP))) - return __secure_computing(NULL); + return __secure_computing(); return 0; } #else extern void secure_computing_strict(int this_syscall); -static inline int __secure_computing(const struct seccomp_data *sd) -{ - secure_computing_strict(sd->nr); - return 0; -} #endif extern long prctl_get_seccomp(void); @@ -58,7 +54,7 @@ static inline int secure_computing(void) { return 0; } #else static inline void secure_computing_strict(int this_syscall) { return; } #endif -static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } +static inline int __secure_computing(void) { return 0; } static inline long prctl_get_seccomp(void) { diff --git a/include/linux/security.h b/include/linux/security.h index cbdba435b798..cc9b54d95d22 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -226,6 +226,18 @@ extern unsigned long dac_mmap_min_addr; #endif /* + * A "security context" is the text representation of + * the information used by LSMs. + * This structure contains the string, its length, and which LSM + * it is useful for. + */ +struct lsm_context { + char *context; /* Provided by the module */ + u32 len; + int id; /* Identifies the module */ +}; + +/* * Values used in the task_security_ops calls */ /* setuid or setgid, id0 == uid or gid */ @@ -378,8 +390,8 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, - const char **xattr_name, void **ctx, - u32 *ctxlen); + const char **xattr_name, + struct lsm_context *lsmcxt); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, @@ -553,14 +565,14 @@ int security_getprocattr(struct task_struct *p, int lsmid, const char *name, int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); -int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); -int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); +int security_secid_to_secctx(u32 secid, struct lsm_context *cp); +int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); -void security_release_secctx(char *secdata, u32 seclen); +void security_release_secctx(struct lsm_context *cp); void security_inode_invalidate_secctx(struct inode *inode); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); -int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); +int security_inode_getsecctx(struct inode *inode, struct lsm_context *cp); int security_locked_down(enum lockdown_reason what); int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); @@ -852,8 +864,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, - u32 *ctxlen) + struct lsm_context *lsmcxt) { return -EOPNOTSUPP; } @@ -1526,14 +1537,13 @@ static inline int security_ismaclabel(const char *name) return 0; } -static inline int security_secid_to_secctx(u32 secid, char **secdata, - u32 *seclen) +static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp) { return -EOPNOTSUPP; } static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) { return -EOPNOTSUPP; } @@ -1545,7 +1555,7 @@ static inline int security_secctx_to_secid(const char *secdata, return -EOPNOTSUPP; } -static inline void security_release_secctx(char *secdata, u32 seclen) +static inline void security_release_secctx(struct lsm_context *cp) { } @@ -1561,7 +1571,8 @@ static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 { return -EOPNOTSUPP; } -static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) +static inline int security_inode_getsecctx(struct inode *inode, + struct lsm_context *cp) { return -EOPNOTSUPP; } @@ -2238,14 +2249,14 @@ struct bpf_map; struct bpf_prog; struct bpf_token; #ifdef CONFIG_SECURITY -extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); +extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size, bool kernel); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token); + struct bpf_token *token, bool kernel); extern void security_bpf_map_free(struct bpf_map *map); extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token); + struct bpf_token *token, bool kernel); extern void security_bpf_prog_free(struct bpf_prog *prog); extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, const struct path *path); @@ -2254,7 +2265,7 @@ extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cm extern int security_bpf_token_capable(const struct bpf_token *token, int cap); #else static inline int security_bpf(int cmd, union bpf_attr *attr, - unsigned int size) + unsigned int size, bool kernel) { return 0; } @@ -2270,7 +2281,7 @@ static inline int security_bpf_prog(struct bpf_prog *prog) } static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) { return 0; } @@ -2279,7 +2290,7 @@ static inline void security_bpf_map_free(struct bpf_map *map) { } static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) { return 0; } @@ -2313,14 +2324,13 @@ struct perf_event_attr; struct perf_event; #ifdef CONFIG_SECURITY -extern int security_perf_event_open(struct perf_event_attr *attr, int type); +extern int security_perf_event_open(int type); extern int security_perf_event_alloc(struct perf_event *event); extern void security_perf_event_free(struct perf_event *event); extern int security_perf_event_read(struct perf_event *event); extern int security_perf_event_write(struct perf_event *event); #else -static inline int security_perf_event_open(struct perf_event_attr *attr, - int type) +static inline int security_perf_event_open(int type) { return 0; } @@ -2351,6 +2361,7 @@ static inline int security_perf_event_write(struct perf_event *event) extern int security_uring_override_creds(const struct cred *new); extern int security_uring_sqpoll(void); extern int security_uring_cmd(struct io_uring_cmd *ioucmd); +extern int security_uring_allowed(void); #else static inline int security_uring_override_creds(const struct cred *new) { @@ -2364,6 +2375,10 @@ static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) { return 0; } +static inline int security_uring_allowed(void) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index fe41da005970..52791e070506 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -167,8 +167,8 @@ extern int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, const void *buf, size_t len, bool ascii); #ifdef CONFIG_BINARY_PRINTF -extern int -seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); +__printf(2, 0) +int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); #endif void seq_buf_do_printk(struct seq_buf *s, const char *lvl); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 2fb266ea69fa..d6ebf0596510 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -181,6 +181,7 @@ int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); #ifdef CONFIG_BINARY_PRINTF +__printf(2, 0) void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary); #endif diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5298765d6ca4..5ce48eab7a2a 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) ({ \ unsigned __seq; \ \ - while ((__seq = seqprop_sequence(s)) & 1) \ + while (unlikely((__seq = seqprop_sequence(s)) & 1)) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ @@ -319,6 +319,29 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) }) /** + * raw_seqcount_try_begin() - begin a seqcount_t read critical section + * w/o lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * @start: count to be passed to read_seqcount_retry() + * + * Similar to raw_seqcount_begin(), except it enables eliding the critical + * section entirely if odd, instead of doing the speculation knowing it will + * fail. + * + * Useful when counter stabilization is more or less equivalent to taking + * the lock and there is a slowpath that does that. + * + * If true, start will be set to the (even) sequence count read. + * + * Return: true when a read critical section is started. + */ +#define raw_seqcount_try_begin(s, start) \ +({ \ + start = raw_read_seqcount(s); \ + !(start & 1); \ +}) + +/** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants diff --git a/include/linux/serdev.h b/include/linux/serdev.h index ff78efc1f60d..34562eb99931 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -84,7 +84,6 @@ enum serdev_parity { struct serdev_controller_ops { ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t); void (*write_flush)(struct serdev_controller *); - int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); void (*close)(struct serdev_controller *); void (*set_flow_control)(struct serdev_controller *, bool); @@ -212,7 +211,6 @@ int serdev_device_break_ctl(struct serdev_device *serdev, int break_state); void serdev_device_write_wakeup(struct serdev_device *); ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long); void serdev_device_write_flush(struct serdev_device *); -int serdev_device_write_room(struct serdev_device *); /* * serdev device driver functions @@ -273,10 +271,6 @@ static inline ssize_t serdev_device_write(struct serdev_device *sdev, return -ENODEV; } static inline void serdev_device_write_flush(struct serdev_device *sdev) {} -static inline int serdev_device_write_room(struct serdev_device *sdev) -{ - return 0; -} #define serdev_device_driver_register(x) #define serdev_device_driver_unregister(x) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index e0717c8393d7..144de7a7948d 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -161,8 +161,8 @@ struct uart_8250_port { void (*dl_write)(struct uart_8250_port *up, u32 value); struct uart_8250_em485 *em485; - void (*rs485_start_tx)(struct uart_8250_port *); - void (*rs485_stop_tx)(struct uart_8250_port *); + void (*rs485_start_tx)(struct uart_8250_port *up, bool toggle_ier); + void (*rs485_stop_tx)(struct uart_8250_port *up, bool toggle_ier); /* Serial port overrun backoff */ struct delayed_work overrun_backoff; diff --git a/include/linux/sizes.h b/include/linux/sizes.h index c3a00b967d18..49039494076f 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -23,17 +23,25 @@ #define SZ_4K 0x00001000 #define SZ_8K 0x00002000 #define SZ_16K 0x00004000 +#define SZ_24K 0x00006000 #define SZ_32K 0x00008000 #define SZ_64K 0x00010000 #define SZ_128K 0x00020000 +#define SZ_192K 0x00030000 #define SZ_256K 0x00040000 +#define SZ_384K 0x00060000 #define SZ_512K 0x00080000 #define SZ_1M 0x00100000 #define SZ_2M 0x00200000 +#define SZ_3M 0x00300000 #define SZ_4M 0x00400000 +#define SZ_6M 0x00600000 #define SZ_8M 0x00800000 +#define SZ_12M 0x00c00000 #define SZ_16M 0x01000000 +#define SZ_18M 0x01200000 +#define SZ_24M 0x01800000 #define SZ_32M 0x02000000 #define SZ_64M 0x04000000 #define SZ_128M 0x08000000 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 58009fa66102..b974a277975a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -470,7 +470,7 @@ struct skb_shared_hwtstamps { /* Definitions for tx_flags in struct skb_shared_info */ enum { /* generate hardware time stamp */ - SKBTX_HW_TSTAMP = 1 << 0, + SKBTX_HW_TSTAMP_NOBPF = 1 << 0, /* generate software time stamp when queueing packet to NIC */ SKBTX_SW_TSTAMP = 1 << 1, @@ -478,8 +478,8 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, - /* generate hardware time stamp based on cycles if supported */ - SKBTX_HW_TSTAMP_USE_CYCLES = 1 << 3, + /* generate software time stamp on packet tx completion */ + SKBTX_COMPLETION_TSTAMP = 1 << 3, /* generate wifi status information (where possible) */ SKBTX_WIFI_STATUS = 1 << 4, @@ -489,12 +489,18 @@ enum { /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, + + /* used for bpf extension when a bpf program is loaded */ + SKBTX_BPF = 1 << 7, }; +#define SKBTX_HW_TSTAMP (SKBTX_HW_TSTAMP_NOBPF | SKBTX_BPF) + #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ - SKBTX_SCHED_TSTAMP) + SKBTX_SCHED_TSTAMP | \ + SKBTX_BPF | \ + SKBTX_COMPLETION_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \ - SKBTX_HW_TSTAMP_USE_CYCLES | \ SKBTX_ANY_SW_TSTAMP) /* Definitions for flags in struct skb_shared_info */ @@ -608,11 +614,19 @@ struct skb_shared_info { * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; - unsigned int xdp_frags_size; - /* Intermediate layers must ensure that destructor_arg - * remains valid until skb destructor */ - void * destructor_arg; + union { + struct { + u32 xdp_frags_size; + u32 xdp_frags_truesize; + }; + + /* + * Intermediate layers must ensure that destructor_arg + * remains valid until skb destructor. + */ + void *destructor_arg; + }; /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; @@ -695,6 +709,8 @@ enum { SKB_GSO_UDP_L4 = 1 << 17, SKB_GSO_FRAGLIST = 1 << 18, + + SKB_GSO_TCP_ACCECN = 1 << 19, }; #if BITS_PER_LONG > 32 @@ -1134,7 +1150,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) * skb_dst - returns skb dst_entry * @skb: buffer * - * Returns skb dst_entry, regardless of reference taken or not. + * Returns: skb dst_entry, regardless of reference taken or not. */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { @@ -1222,7 +1238,7 @@ static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) * skb_unref - decrement the skb's reference count * @skb: buffer * - * Returns true if we can free the skb. + * Returns: true if we can free the skb. */ static inline bool skb_unref(struct sk_buff *skb) { @@ -1307,6 +1323,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); void skb_attempt_defer_free(struct sk_buff *skb); +u32 napi_skb_cache_get_bulk(void **skbs, u32 n); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); struct sk_buff *slab_build_skb(void *data); @@ -1344,7 +1361,7 @@ struct sk_buff_fclones { * @sk: socket * @skb: buffer * - * Returns true if skb is a fast clone, and its clone is not freed. + * Returns: true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), * so we also check that didn't happen. */ @@ -1519,14 +1536,8 @@ void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - const void *data, int hlen_proto); - -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, - int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} +__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + const void *data, int hlen_proto); void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, @@ -3516,7 +3527,7 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) * A page shouldn't be considered for reusing/recycling if it was allocated * under memory pressure or at a distant memory node. * - * Returns false if this page should be returned to page allocator, true + * Returns: false if this page should be returned to page allocator, true * otherwise. */ static inline bool dev_page_is_reusable(const struct page *page) @@ -3627,13 +3638,13 @@ static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, - struct bpf_prog *prog); + const struct bpf_prog *prog); /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. The page must already + * Returns: the address of the data within @frag. The page must already * be mapped. */ static inline void *skb_frag_address(const skb_frag_t *frag) @@ -3648,7 +3659,7 @@ static inline void *skb_frag_address(const skb_frag_t *frag) * skb_frag_address_safe - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. Checks that the page + * Returns: the address of the data within @frag. Checks that the page * is mapped and returns %NULL otherwise. */ static inline void *skb_frag_address_safe(const skb_frag_t *frag) @@ -3674,7 +3685,7 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto, bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** - * skb_frag_dma_map - maps a paged fragment via the DMA API + * __skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to * @frag: the paged fragment to map * @offset: the offset within the fragment (starting at the @@ -3684,15 +3695,36 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); * * Maps the page associated with @frag to @device. */ -static inline dma_addr_t skb_frag_dma_map(struct device *dev, - const skb_frag_t *frag, - size_t offset, size_t size, - enum dma_data_direction dir) +static inline dma_addr_t __skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) { return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } +#define skb_frag_dma_map(dev, frag, ...) \ + CONCATENATE(_skb_frag_dma_map, \ + COUNT_ARGS(__VA_ARGS__))(dev, frag, ##__VA_ARGS__) + +#define __skb_frag_dma_map1(dev, frag, offset, uf, uo) ({ \ + const skb_frag_t *uf = (frag); \ + size_t uo = (offset); \ + \ + __skb_frag_dma_map(dev, uf, uo, skb_frag_size(uf) - uo, \ + DMA_TO_DEVICE); \ +}) +#define _skb_frag_dma_map1(dev, frag, offset) \ + __skb_frag_dma_map1(dev, frag, offset, __UNIQUE_ID(frag_), \ + __UNIQUE_ID(offset_)) +#define _skb_frag_dma_map0(dev, frag) \ + _skb_frag_dma_map1(dev, frag, 0) +#define _skb_frag_dma_map2(dev, frag, offset, size) \ + __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) +#define _skb_frag_dma_map3(dev, frag, offset, size, dir) \ + __skb_frag_dma_map(dev, frag, offset, size, dir) + static inline struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { @@ -3836,25 +3868,6 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) __must_check; -static inline int skb_add_data(struct sk_buff *skb, - struct iov_iter *from, int copy) -{ - const int off = skb->len; - - if (skb->ip_summed == CHECKSUM_NONE) { - __wsum csum = 0; - if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy, - &csum, from)) { - skb->csum = csum_block_add(skb->csum, csum, off); - return 0; - } - } else if (copy_from_iter_full(skb_put(skb, copy), copy, from)) - return 0; - - __skb_trim(skb, off); - return -EFAULT; -} - static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { @@ -3890,7 +3903,7 @@ static inline int skb_linearize(struct sk_buff *skb) * skb_has_shared_frag - can any frag be overwritten * @skb: buffer to test * - * Return true if the skb has at least one frag that might be modified + * Return: true if the skb has at least one frag that might be modified * by an external entity (as in vmsplice()/sendfile()) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) @@ -4535,7 +4548,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void skb_tx_timestamp(struct sk_buff *skb) { skb_clone_tx_timestamp(skb); - if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) + if (skb_shinfo(skb)->tx_flags & (SKBTX_SW_TSTAMP | SKBTX_BPF)) skb_tstamp_tx(skb, NULL); } @@ -4612,7 +4625,7 @@ static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) /* Check if we need to perform checksum complete validation. * - * Returns true if checksum complete is needed, false otherwise + * Returns: true if checksum complete is needed, false otherwise * (either checksum is unnecessary or zero checksum is allowed). */ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 2cbe0c22a32f..0b9095a281b8 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -91,6 +91,8 @@ struct sk_psock { struct sk_psock_progs progs; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) struct strparser strp; + u32 copied_seq; + u32 ingress_bytes; #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; diff --git a/include/linux/slab.h b/include/linux/slab.h index 10a971c2bde3..d5a8ab98035c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/overflow.h> #include <linux/types.h> +#include <linux/rcupdate.h> #include <linux/workqueue.h> #include <linux/percpu-refcount.h> #include <linux/cleanup.h> @@ -136,6 +137,15 @@ enum _slab_flag_bits { * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * + * Note that object identity check has to be done *after* acquiring a + * reference, therefore user has to ensure proper ordering for loads. + * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU, + * the newly allocated object has to be fully initialized *before* its + * refcount gets initialized and proper ordering for stores is required. + * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are + * designed with the proper fences required for reference counting objects + * allocated with SLAB_TYPESAFE_BY_RCU. + * * Note that it is not possible to acquire a lock within a structure * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages @@ -235,12 +245,6 @@ enum _slab_flag_bits { #endif /* - * freeptr_t represents a SLUB freelist pointer, which might be encoded - * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. - */ -typedef struct { unsigned long v; } freeptr_t; - -/* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. @@ -941,8 +945,6 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (__builtin_constant_p(n) && __builtin_constant_p(size)) - return kmalloc_noprof(bytes, flags); return kmalloc_noprof(bytes, flags); } #define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) @@ -1082,6 +1084,19 @@ extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); +#ifndef CONFIG_KVFREE_RCU_BATCHED +static inline void kvfree_rcu_barrier(void) +{ + rcu_barrier(); +} + +static inline void kfree_rcu_scheduler_running(void) { } +#else +void kvfree_rcu_barrier(void); + +void kfree_rcu_scheduler_running(void); +#endif + /** * kmalloc_size_roundup - Report allocation bucket size for the given size * @@ -1099,5 +1114,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s); size_t kmalloc_size_roundup(size_t size); void __init kmem_cache_init_late(void); +void __init kvfree_rcu_init(void); #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index c06d17599ae7..736f53018017 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -56,7 +56,7 @@ struct apple_rtkit_shmem { * context. */ struct apple_rtkit_ops { - void (*crashed)(void *cookie); + void (*crashed)(void *cookie, const void *crashlog, size_t crashlog_size); void (*recv_message)(void *cookie, u8 endpoint, u64 message); bool (*recv_message_early)(void *cookie, u8 endpoint, u64 message); int (*shmem_setup)(void *cookie, struct apple_rtkit_shmem *bfr); diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 5bee6f7fc400..0c3906e8ad19 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -391,14 +391,6 @@ int cmdq_pkt_jump_rel(struct cmdq_pkt *pkt, s32 offset, u8 shift_pa); */ int cmdq_pkt_eoc(struct cmdq_pkt *pkt); -/** - * cmdq_pkt_finalize() - Append EOC and jump command to pkt. - * @pkt: the CMDQ packet - * - * Return: 0 for success; else the error code is returned - */ -int cmdq_pkt_finalize(struct cmdq_pkt *pkt); - #else /* IS_ENABLED(CONFIG_MTK_CMDQ) */ static inline int cmdq_dev_get_client_reg(struct device *dev, @@ -519,11 +511,6 @@ static inline int cmdq_pkt_eoc(struct cmdq_pkt *pkt) return -EINVAL; } -static inline int cmdq_pkt_finalize(struct cmdq_pkt *pkt) -{ - return -EINVAL; -} - #endif /* IS_ENABLED(CONFIG_MTK_CMDQ) */ #endif /* __MTK_CMDQ_H__ */ diff --git a/include/linux/sort.h b/include/linux/sort.h index e163287ac6c1..8e5603b10941 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -13,4 +13,15 @@ void sort(void *base, size_t num, size_t size, cmp_func_t cmp_func, swap_func_t swap_func); +/* Versions that periodically call cond_resched(): */ + +void sort_r_nonatomic(void *base, size_t num, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv); + +void sort_nonatomic(void *base, size_t num, size_t size, + cmp_func_t cmp_func, + swap_func_t swap_func); + #endif diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index bd9836690da6..2362f621d94c 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -54,6 +54,8 @@ struct sdw_slave; #define SDW_MAX_PORTS 15 #define SDW_VALID_PORT_RANGE(n) ((n) < SDW_MAX_PORTS && (n) >= 1) +#define SDW_MAX_LANES 8 + enum { SDW_PORT_DIRN_SINK = 0, SDW_PORT_DIRN_SOURCE, @@ -148,12 +150,14 @@ enum sdw_dpn_pkg_mode { * * @SDW_STREAM_PCM: PCM data stream * @SDW_STREAM_PDM: PDM data stream + * @SDW_STREAM_BPT: BPT data stream * * spec doesn't define this, but is used in implementation */ enum sdw_stream_type { SDW_STREAM_PCM = 0, SDW_STREAM_PDM = 1, + SDW_STREAM_BPT = 2, }; /** @@ -356,6 +360,7 @@ struct sdw_dpn_prop { * and masks are supported * @commit_register_supported: is PCP_Commit register supported * @scp_int1_mask: SCP_INT1_MASK desired settings + * @lane_maps: Lane mapping for the slave, only valid if lane_control_support is set * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. @@ -385,6 +390,7 @@ struct sdw_slave_prop { u32 sdca_interrupt_register_list; u8 commit_register_supported; u8 scp_int1_mask; + u8 lane_maps[SDW_MAX_LANES]; bool clock_reg_supported; bool use_domain_irq; }; @@ -450,6 +456,7 @@ struct sdw_master_prop { int sdw_master_read_prop(struct sdw_bus *bus); int sdw_slave_read_prop(struct sdw_slave *slave); +int sdw_slave_read_lane_mapping(struct sdw_slave *slave); /* * SDW Slave Structures and APIs @@ -817,6 +824,15 @@ struct sdw_defer { struct completion complete; }; +/* + * Add a practical limit to BPT transfer sizes. BPT is typically used + * to transfer firmware, and larger firmware transfers will increase + * the cold latency beyond typical OS or user requirements. + */ +#define SDW_BPT_MSG_MAX_BYTES (1024 * 1024) + +struct sdw_bpt_msg; + /** * struct sdw_master_ops - Master driver ops * @read_prop: Read Master properties @@ -832,6 +848,10 @@ struct sdw_defer { * @get_device_num: Callback for vendor-specific device_number allocation * @put_device_num: Callback for vendor-specific device_number release * @new_peripheral_assigned: Callback to handle enumeration of new peripheral. + * @bpt_send_async: reserve resources for BPT stream and send message + * using BTP protocol + * @bpt_wait: wait for message completion using BTP protocol + * and release resources */ struct sdw_master_ops { int (*read_prop)(struct sdw_bus *bus); @@ -848,77 +868,9 @@ struct sdw_master_ops { void (*new_peripheral_assigned)(struct sdw_bus *bus, struct sdw_slave *slave, int dev_num); -}; - -/** - * struct sdw_bus - SoundWire bus - * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. - * @md: Master device - * @bus_lock_key: bus lock key associated to @bus_lock - * @bus_lock: bus lock - * @slaves: list of Slaves on this bus - * @msg_lock_key: message lock key associated to @msg_lock - * @msg_lock: message lock - * @m_rt_list: List of Master instance of all stream(s) running on Bus. This - * is used to compute and program bus bandwidth, clock, frame shape, - * transport and port parameters - * @defer_msg: Defer message - * @params: Current bus parameters - * @stream_refcount: number of streams currently using this bus - * @ops: Master callback ops - * @port_ops: Master port callback ops - * @prop: Master properties - * @vendor_specific_prop: pointer to non-standard properties - * @hw_sync_min_links: Number of links used by a stream above which - * hardware-based synchronization is required. This value is only - * meaningful if multi_link is set. If set to 1, hardware-based - * synchronization will be used even if a stream only uses a single - * SoundWire segment. - * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. - * @link_id: Link id number, can be 0 to N, unique for each Controller - * @id: bus system-wide unique id - * @compute_params: points to Bus resource management implementation - * @assigned: Bitmap for Slave device numbers. - * Bit set implies used number, bit clear implies unused number. - * @clk_stop_timeout: Clock stop timeout computed - * @bank_switch_timeout: Bank switch timeout computed - * @domain: IRQ domain - * @irq_chip: IRQ chip - * @debugfs: Bus debugfs (optional) - * @multi_link: Store bus property that indicates if multi links - * are supported. This flag is populated by drivers after reading - * appropriate firmware (ACPI/DT). - */ -struct sdw_bus { - struct device *dev; - struct sdw_master_device *md; - struct lock_class_key bus_lock_key; - struct mutex bus_lock; - struct list_head slaves; - struct lock_class_key msg_lock_key; - struct mutex msg_lock; - struct list_head m_rt_list; - struct sdw_defer defer_msg; - struct sdw_bus_params params; - int stream_refcount; - const struct sdw_master_ops *ops; - const struct sdw_master_port_ops *port_ops; - struct sdw_master_prop prop; - void *vendor_specific_prop; - int hw_sync_min_links; - int controller_id; - unsigned int link_id; - int id; - int (*compute_params)(struct sdw_bus *bus); - DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); - unsigned int clk_stop_timeout; - u32 bank_switch_timeout; - struct irq_chip irq_chip; - struct irq_domain *domain; -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs; -#endif - bool multi_link; + int (*bpt_send_async)(struct sdw_bus *bus, struct sdw_slave *slave, + struct sdw_bpt_msg *msg); + int (*bpt_wait)(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, @@ -945,7 +897,7 @@ struct sdw_port_config { * @ch_count: Channel count of the stream * @bps: Number of bits per audio sample * @direction: Data direction - * @type: Stream type PCM or PDM + * @type: Stream type PCM, PDM or BPT */ struct sdw_stream_config { unsigned int frame_rate; @@ -995,7 +947,7 @@ struct sdw_stream_params { * @name: SoundWire stream name * @params: Stream parameters * @state: Current state of the stream - * @type: Stream type PCM or PDM + * @type: Stream type PCM, PDM or BPT * @m_rt_count: Count of Master runtime(s) in this stream * @master_list: List of Master runtime(s) in this stream. * master_list can contain only one m_rt per Master instance @@ -1010,10 +962,88 @@ struct sdw_stream_runtime { struct list_head master_list; }; -struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); +/** + * struct sdw_bus - SoundWire bus + * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. + * @md: Master device + * @bus_lock_key: bus lock key associated to @bus_lock + * @bus_lock: bus lock + * @slaves: list of Slaves on this bus + * @msg_lock_key: message lock key associated to @msg_lock + * @msg_lock: message lock + * @m_rt_list: List of Master instance of all stream(s) running on Bus. This + * is used to compute and program bus bandwidth, clock, frame shape, + * transport and port parameters + * @defer_msg: Defer message + * @params: Current bus parameters + * @stream_refcount: number of streams currently using this bus + * @btp_stream_refcount: number of BTP streams currently using this bus (should + * be zero or one, multiple streams per link is not supported). + * @bpt_stream: pointer stored to handle BTP streams. + * @ops: Master callback ops + * @port_ops: Master port callback ops + * @prop: Master properties + * @vendor_specific_prop: pointer to non-standard properties + * @hw_sync_min_links: Number of links used by a stream above which + * hardware-based synchronization is required. This value is only + * meaningful if multi_link is set. If set to 1, hardware-based + * synchronization will be used even if a stream only uses a single + * SoundWire segment. + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller + * @id: bus system-wide unique id + * @compute_params: points to Bus resource management implementation + * @assigned: Bitmap for Slave device numbers. + * Bit set implies used number, bit clear implies unused number. + * @clk_stop_timeout: Clock stop timeout computed + * @bank_switch_timeout: Bank switch timeout computed + * @domain: IRQ domain + * @irq_chip: IRQ chip + * @debugfs: Bus debugfs (optional) + * @multi_link: Store bus property that indicates if multi links + * are supported. This flag is populated by drivers after reading + * appropriate firmware (ACPI/DT). + * @lane_used_bandwidth: how much bandwidth in bits per second is used by each lane + */ +struct sdw_bus { + struct device *dev; + struct sdw_master_device *md; + struct lock_class_key bus_lock_key; + struct mutex bus_lock; + struct list_head slaves; + struct lock_class_key msg_lock_key; + struct mutex msg_lock; + struct list_head m_rt_list; + struct sdw_defer defer_msg; + struct sdw_bus_params params; + int stream_refcount; + int bpt_stream_refcount; + struct sdw_stream_runtime *bpt_stream; + const struct sdw_master_ops *ops; + const struct sdw_master_port_ops *port_ops; + struct sdw_master_prop prop; + void *vendor_specific_prop; + int hw_sync_min_links; + int controller_id; + unsigned int link_id; + int id; + int (*compute_params)(struct sdw_bus *bus, struct sdw_stream_runtime *stream); + DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); + unsigned int clk_stop_timeout; + u32 bank_switch_timeout; + struct irq_chip irq_chip; + struct irq_domain *domain; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs; +#endif + bool multi_link; + unsigned int lane_used_bandwidth[SDW_MAX_LANES]; +}; + +struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name, enum sdw_stream_type type); void sdw_release_stream(struct sdw_stream_runtime *stream); -int sdw_compute_params(struct sdw_bus *bus); +int sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream); int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, @@ -1034,6 +1064,11 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus); int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); +bool is_clock_scaling_supported_by_slave(struct sdw_slave *slave); + +int sdw_bpt_send_async(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); +int sdw_bpt_wait(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); +int sdw_bpt_send_sync(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); #if IS_ENABLED(CONFIG_SOUNDWIRE) @@ -1045,6 +1080,8 @@ int sdw_stream_add_slave(struct sdw_slave *slave, int sdw_stream_remove_slave(struct sdw_slave *slave, struct sdw_stream_runtime *stream); +int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base); + /* messaging and data APIs */ int sdw_read(struct sdw_slave *slave, u32 addr); int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 799f8578137b..6b839987f14c 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -28,6 +28,8 @@ #define ACP_SDW1 1 #define AMD_SDW_MAX_MANAGER_COUNT 2 #define ACP63_PCI_REV_ID 0x63 +#define ACP70_PCI_REV_ID 0x70 +#define ACP71_PCI_REV_ID 0x71 struct acp_sdw_pdata { u16 instance; diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 580086417e4b..dc6ebaee3d18 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -365,7 +365,7 @@ struct sdw_intel_res { * on e.g. which machine driver to select (I2S mode, HDaudio or * SoundWire). */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info); void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); @@ -436,6 +436,10 @@ struct sdw_intel_hw_ops { bool (*sync_check_cmdsync_unlocked)(struct sdw_intel *sdw); void (*program_sdi)(struct sdw_intel *sdw, int dev_num); + + int (*bpt_send_async)(struct sdw_intel *sdw, struct sdw_slave *slave, + struct sdw_bpt_msg *msg); + int (*bpt_wait)(struct sdw_intel *sdw, struct sdw_slave *slave, struct sdw_bpt_msg *msg); }; extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops; diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index 658b10fa5b20..0a5939285583 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -4,6 +4,9 @@ #ifndef __SDW_REGISTERS_H #define __SDW_REGISTERS_H +#include <linux/bitfield.h> +#include <linux/bits.h> + /* * SDW registers as defined by MIPI 1.2 Spec */ @@ -329,16 +332,27 @@ * 2:0 Control Number[2:0] */ -#define SDW_SDCA_CTL(fun, ent, ctl, ch) (BIT(30) | \ - (((fun) & 0x7) << 22) | \ - (((ent) & 0x40) << 15) | \ - (((ent) & 0x3f) << 7) | \ - (((ctl) & 0x30) << 15) | \ - (((ctl) & 0x0f) << 3) | \ - (((ch) & 0x38) << 12) | \ - ((ch) & 0x07)) +#define SDW_SDCA_CTL(fun, ent, ctl, ch) (BIT(30) | \ + (((fun) & GENMASK(2, 0)) << 22) | \ + (((ent) & BIT(6)) << 15) | \ + (((ent) & GENMASK(5, 0)) << 7) | \ + (((ctl) & GENMASK(5, 4)) << 15) | \ + (((ctl) & GENMASK(3, 0)) << 3) | \ + (((ch) & GENMASK(5, 3)) << 12) | \ + ((ch) & GENMASK(2, 0))) + +#define SDW_SDCA_CTL_FUNC(reg) FIELD_GET(GENMASK(24, 22), (reg)) +#define SDW_SDCA_CTL_ENT(reg) ((FIELD_GET(BIT(21), (reg)) << 6) | \ + FIELD_GET(GENMASK(12, 7), (reg))) +#define SDW_SDCA_CTL_CSEL(reg) ((FIELD_GET(GENMASK(20, 19), (reg)) << 4) | \ + FIELD_GET(GENMASK(6, 3), (reg))) +#define SDW_SDCA_CTL_CNUM(reg) ((FIELD_GET(GENMASK(17, 15), (reg)) << 3) | \ + FIELD_GET(GENMASK(2, 0), (reg))) #define SDW_SDCA_MBQ_CTL(reg) ((reg) | BIT(13)) #define SDW_SDCA_NEXT_CTL(reg) ((reg) | BIT(14)) +/* Check the reserved and fixed bits in address */ +#define SDW_SDCA_VALID_CTL(reg) (((reg) & (GENMASK(31, 25) | BIT(18) | BIT(13))) == BIT(30)) + #endif /* __SDW_REGISTERS_H */ diff --git a/include/linux/spi/offload/consumer.h b/include/linux/spi/offload/consumer.h new file mode 100644 index 000000000000..cd7d5daa21e6 --- /dev/null +++ b/include/linux/spi/offload/consumer.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Analog Devices Inc. + * Copyright (C) 2024 BayLibre, SAS + */ + +#ifndef __LINUX_SPI_OFFLOAD_CONSUMER_H +#define __LINUX_SPI_OFFLOAD_CONSUMER_H + +#include <linux/module.h> +#include <linux/spi/offload/types.h> +#include <linux/types.h> + +MODULE_IMPORT_NS("SPI_OFFLOAD"); + +struct device; +struct spi_device; + +struct spi_offload *devm_spi_offload_get(struct device *dev, struct spi_device *spi, + const struct spi_offload_config *config); + +struct spi_offload_trigger +*devm_spi_offload_trigger_get(struct device *dev, + struct spi_offload *offload, + enum spi_offload_trigger_type type); +int spi_offload_trigger_validate(struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); +int spi_offload_trigger_enable(struct spi_offload *offload, + struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); +void spi_offload_trigger_disable(struct spi_offload *offload, + struct spi_offload_trigger *trigger); + +struct dma_chan *devm_spi_offload_tx_stream_request_dma_chan(struct device *dev, + struct spi_offload *offload); +struct dma_chan *devm_spi_offload_rx_stream_request_dma_chan(struct device *dev, + struct spi_offload *offload); + +#endif /* __LINUX_SPI_OFFLOAD_CONSUMER_H */ diff --git a/include/linux/spi/offload/provider.h b/include/linux/spi/offload/provider.h new file mode 100644 index 000000000000..76c7cf651092 --- /dev/null +++ b/include/linux/spi/offload/provider.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Analog Devices Inc. + * Copyright (C) 2024 BayLibre, SAS + */ + +#ifndef __LINUX_SPI_OFFLOAD_PROVIDER_H +#define __LINUX_SPI_OFFLOAD_PROVIDER_H + +#include <linux/module.h> +#include <linux/spi/offload/types.h> +#include <linux/types.h> + +MODULE_IMPORT_NS("SPI_OFFLOAD"); + +struct device; +struct spi_offload_trigger; + +struct spi_offload *devm_spi_offload_alloc(struct device *dev, size_t priv_size); + +struct spi_offload_trigger_ops { + bool (*match)(struct spi_offload_trigger *trigger, + enum spi_offload_trigger_type type, u64 *args, u32 nargs); + int (*request)(struct spi_offload_trigger *trigger, + enum spi_offload_trigger_type type, u64 *args, u32 nargs); + void (*release)(struct spi_offload_trigger *trigger); + int (*validate)(struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); + int (*enable)(struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); + void (*disable)(struct spi_offload_trigger *trigger); +}; + +struct spi_offload_trigger_info { + /** @fwnode: Provider fwnode, used to match to consumer. */ + struct fwnode_handle *fwnode; + /** @ops: Provider-specific callbacks. */ + const struct spi_offload_trigger_ops *ops; + /** Provider-specific state to be used in callbacks. */ + void *priv; +}; + +int devm_spi_offload_trigger_register(struct device *dev, + struct spi_offload_trigger_info *info); +void *spi_offload_trigger_get_priv(struct spi_offload_trigger *trigger); + +#endif /* __LINUX_SPI_OFFLOAD_PROVIDER_H */ diff --git a/include/linux/spi/offload/types.h b/include/linux/spi/offload/types.h new file mode 100644 index 000000000000..6f7892347871 --- /dev/null +++ b/include/linux/spi/offload/types.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Analog Devices Inc. + * Copyright (C) 2024 BayLibre, SAS + */ + +#ifndef __LINUX_SPI_OFFLOAD_TYPES_H +#define __LINUX_SPI_OFFLOAD_TYPES_H + +#include <linux/bits.h> +#include <linux/types.h> + +struct device; + +/* This is write xfer but TX uses external data stream rather than tx_buf. */ +#define SPI_OFFLOAD_XFER_TX_STREAM BIT(0) +/* This is read xfer but RX uses external data stream rather than rx_buf. */ +#define SPI_OFFLOAD_XFER_RX_STREAM BIT(1) + +/* Offload can be triggered by external hardware event. */ +#define SPI_OFFLOAD_CAP_TRIGGER BIT(0) +/* Offload can record and then play back TX data when triggered. */ +#define SPI_OFFLOAD_CAP_TX_STATIC_DATA BIT(1) +/* Offload can get TX data from an external stream source. */ +#define SPI_OFFLOAD_CAP_TX_STREAM_DMA BIT(2) +/* Offload can send RX data to an external stream sink. */ +#define SPI_OFFLOAD_CAP_RX_STREAM_DMA BIT(3) + +/** + * struct spi_offload_config - offload configuration + * + * This is used to request an offload with specific configuration. + */ +struct spi_offload_config { + /** @capability_flags: required capabilities. See %SPI_OFFLOAD_CAP_* */ + u32 capability_flags; +}; + +/** + * struct spi_offload - offload instance + */ +struct spi_offload { + /** @provider_dev: for get/put reference counting */ + struct device *provider_dev; + /** @priv: provider driver private data */ + void *priv; + /** @ops: callbacks for offload support */ + const struct spi_offload_ops *ops; + /** @xfer_flags: %SPI_OFFLOAD_XFER_* flags supported by provider */ + u32 xfer_flags; +}; + +enum spi_offload_trigger_type { + /* Indication from SPI peripheral that data is read to read. */ + SPI_OFFLOAD_TRIGGER_DATA_READY, + /* Trigger comes from a periodic source such as a clock. */ + SPI_OFFLOAD_TRIGGER_PERIODIC, +}; + +struct spi_offload_trigger_periodic { + u64 frequency_hz; +}; + +struct spi_offload_trigger_config { + /** @type: type discriminator for union */ + enum spi_offload_trigger_type type; + union { + struct spi_offload_trigger_periodic periodic; + }; +}; + +/** + * struct spi_offload_ops - callbacks implemented by offload providers + */ +struct spi_offload_ops { + /** + * @trigger_enable: Optional callback to enable the trigger for the + * given offload instance. + */ + int (*trigger_enable)(struct spi_offload *offload); + /** + * @trigger_disable: Optional callback to disable the trigger for the + * given offload instance. + */ + void (*trigger_disable)(struct spi_offload *offload); + /** + * @tx_stream_request_dma_chan: Optional callback for controllers that + * have an offload where the TX data stream is connected directly to a + * DMA channel. + */ + struct dma_chan *(*tx_stream_request_dma_chan)(struct spi_offload *offload); + /** + * @rx_stream_request_dma_chan: Optional callback for controllers that + * have an offload where the RX data stream is connected directly to a + * DMA channel. + */ + struct dma_chan *(*rx_stream_request_dma_chan)(struct spi_offload *offload); +}; + +#endif /* __LINUX_SPI_OFFLOAD_TYPES_H */ diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index c46d2b8029be..c4830dfaff3d 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -15,16 +15,32 @@ #define SPI_MEM_OP_CMD(__opcode, __buswidth) \ { \ + .nbytes = 1, \ .buswidth = __buswidth, \ .opcode = __opcode, \ + } + +#define SPI_MEM_DTR_OP_CMD(__opcode, __buswidth) \ + { \ .nbytes = 1, \ + .opcode = __opcode, \ + .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth) \ { \ .nbytes = __nbytes, \ + .buswidth = __buswidth, \ + .val = __val, \ + } + +#define SPI_MEM_DTR_OP_ADDR(__nbytes, __val, __buswidth) \ + { \ + .nbytes = __nbytes, \ .val = __val, \ .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_NO_ADDR { } @@ -35,22 +51,47 @@ .buswidth = __buswidth, \ } +#define SPI_MEM_DTR_OP_DUMMY(__nbytes, __buswidth) \ + { \ + .nbytes = __nbytes, \ + .buswidth = __buswidth, \ + .dtr = true, \ + } + #define SPI_MEM_OP_NO_DUMMY { } #define SPI_MEM_OP_DATA_IN(__nbytes, __buf, __buswidth) \ { \ + .buswidth = __buswidth, \ + .dir = SPI_MEM_DATA_IN, \ + .nbytes = __nbytes, \ + .buf.in = __buf, \ + } + +#define SPI_MEM_DTR_OP_DATA_IN(__nbytes, __buf, __buswidth) \ + { \ .dir = SPI_MEM_DATA_IN, \ .nbytes = __nbytes, \ .buf.in = __buf, \ .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_DATA_OUT(__nbytes, __buf, __buswidth) \ { \ + .buswidth = __buswidth, \ + .dir = SPI_MEM_DATA_OUT, \ + .nbytes = __nbytes, \ + .buf.out = __buf, \ + } + +#define SPI_MEM_DTR_OP_DATA_OUT(__nbytes, __buf, __buswidth) \ + { \ .dir = SPI_MEM_DATA_OUT, \ .nbytes = __nbytes, \ .buf.out = __buf, \ .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_NO_DATA { } @@ -68,6 +109,9 @@ enum spi_mem_data_dir { SPI_MEM_DATA_OUT, }; +#define SPI_MEM_OP_MAX_FREQ(__freq) \ + .max_freq = __freq + /** * struct spi_mem_op - describes a SPI memory operation * @cmd.nbytes: number of opcode bytes (only 1 or 2 are valid). The opcode is @@ -97,6 +141,9 @@ enum spi_mem_data_dir { * operation does not involve transferring data * @data.buf.in: input buffer (must be DMA-able) * @data.buf.out: output buffer (must be DMA-able) + * @max_freq: frequency limitation wrt this operation. 0 means there is no + * specific constraint and the highest achievable frequency can be + * attempted. */ struct spi_mem_op { struct { @@ -135,14 +182,17 @@ struct spi_mem_op { const void *out; } buf; } data; + + unsigned int max_freq; }; -#define SPI_MEM_OP(__cmd, __addr, __dummy, __data) \ +#define SPI_MEM_OP(__cmd, __addr, __dummy, __data, ...) \ { \ .cmd = __cmd, \ .addr = __addr, \ .dummy = __dummy, \ .data = __data, \ + __VA_ARGS__ \ } /** @@ -302,11 +352,13 @@ struct spi_controller_mem_ops { * @ecc: Supports operations with error correction * @swap16: Supports swapping bytes on a 16 bit boundary when configured in * Octal DTR + * @per_op_freq: Supports per operation frequency switching */ struct spi_controller_mem_caps { bool dtr; bool ecc; bool swap16; + bool per_op_freq; }; #define spi_mem_controller_is_capable(ctlr, cap) \ @@ -371,6 +423,8 @@ bool spi_mem_default_supports_op(struct spi_mem *mem, #endif /* CONFIG_SPI_MEM */ int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op); +void spi_mem_adjust_op_freq(struct spi_mem *mem, struct spi_mem_op *op); +u64 spi_mem_calc_op_duration(struct spi_mem_op *op); bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 8497f4747e24..6e64f0193777 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -31,9 +31,11 @@ struct spi_transfer; struct spi_controller_mem_ops; struct spi_controller_mem_caps; struct spi_message; +struct spi_offload; +struct spi_offload_config; /* - * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, + * INTERFACES between SPI controller-side drivers and SPI target protocol handlers, * and SPI infrastructure. */ extern const struct bus_type spi_bus_type; @@ -128,7 +130,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_transfer *xfer); /** - * struct spi_device - Controller side proxy for an SPI slave device + * struct spi_device - Controller side proxy for an SPI target device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. * @max_speed_hz: Maximum clock rate to be used with this chip @@ -172,7 +174,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @pcpu_statistics: statistics for the spi_device * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array * - * A @spi_device is used to interchange data between an SPI slave + * A @spi_device is used to interchange data between an SPI target device * (usually a discrete chip) and CPU memory. * * In @dev, the platform_data is used to hold information about this @@ -247,10 +249,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(const struct device *dev) -{ - return dev ? container_of(dev, struct spi_device, dev) : NULL; -} +#define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev) /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) @@ -386,15 +385,15 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch spi_unregister_driver) /** - * struct spi_controller - interface to SPI master or slave controller + * struct spi_controller - interface to SPI host or target controller * @dev: device interface to this driver * @list: link with the global spi_controller list * @bus_num: board-specific (and often SOC-specific) identifier for a * given SPI controller. * @num_chipselect: chipselects are used to distinguish individual - * SPI slaves, and are numbered from zero to num_chipselects. - * each slave has a chipselect signal, but it's common that not - * every chipselect is connected to a slave. + * SPI targets, and are numbered from zero to num_chipselects. + * each target has a chipselect signal, but it's common that not + * every chipselect is connected to a target. * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @buswidth_override_bits: flags to override for this controller driver @@ -423,9 +422,9 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * must fail if an unrecognized or unsupported mode is requested. * It's always safe to call this unless transfers are pending on * the device whose settings are being modified. - * @set_cs_timing: optional hook for SPI devices to request SPI master + * @set_cs_timing: optional hook for SPI devices to request SPI * controller for configuring specific CS setup time, hold time and inactive - * delay interms of clock counts + * delay in terms of clock counts * @transfer: adds a message to the controller's transfer queue. * @cleanup: frees controller-specific state * @can_dma: determine whether this controller supports DMA @@ -496,6 +495,10 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @mem_ops: optimized/dedicated operations for interactions with SPI memory. * This field is optional and should only be implemented if the * controller has native support for memory like operations. + * @get_offload: callback for controllers with offload support to get matching + * offload instance. Implementations should return -ENODEV if no match is + * found. + * @put_offload: release the offload instance acquired by @get_offload. * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). * @target_abort: abort the ongoing transfer request on an SPI target controller @@ -541,7 +544,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * * The driver for an SPI controller manages access to those devices through * a queue of spi_message transactions, copying data between CPU memory and - * an SPI slave device. For each such message it queues, it calls the + * an SPI target device. For each such message it queues, it calls the * message's completion function when the transaction completes. */ struct spi_controller { @@ -591,7 +594,7 @@ struct spi_controller { #define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ -#define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select target device */ #define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* * The spi-controller has multi chip select capability and can @@ -658,7 +661,7 @@ struct spi_controller { * + To a given spi_device, message queueing is pure FIFO * * + The controller's main job is to process its message queue, - * selecting a chip (for masters), then transferring data + * selecting a chip (for controllers), then transferring data * + If there are multiple spi_device children, the i/o queue * arbitration algorithm is unspecified (round robin, FIFO, * priority, reservations, preemption, etc) @@ -740,6 +743,10 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; + struct spi_offload *(*get_offload)(struct spi_device *spi, + const struct spi_offload_config *config); + void (*put_offload)(struct spi_offload *offload); + /* GPIO chip select */ struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; @@ -822,7 +829,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, /* The SPI driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, - unsigned int size, bool slave); + unsigned int size, bool target); static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) @@ -841,7 +848,7 @@ static inline struct spi_controller *spi_alloc_target(struct device *dev, struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, - bool slave); + bool target); static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) @@ -963,6 +970,8 @@ struct spi_res { * @rx_sg_mapped: If true, the @rx_sg is mapped for DMA * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use + * @offload_flags: Flags that are only applicable to specialized SPI offload + * transfers. See %SPI_OFFLOAD_XFER_* in spi-offload.h. * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset * within @tx_buf for which the SPI device is requesting that the time * snapshot for this transfer begins. Upon completing the SPI transfer, @@ -977,12 +986,12 @@ struct spi_res { * purposefully (instead of setting to spi_transfer->len - 1) to denote * that a transfer-level snapshot taken from within the driver may still * be of higher quality. - * @ptp_sts: Pointer to a memory location held by the SPI slave device where a + * @ptp_sts: Pointer to a memory location held by the SPI target device where a * PTP system timestamp structure may lie. If drivers use PIO or their * hardware has some sort of assist for retrieving exact transfer timing, * they can (and should) assert @ptp_sts_supported and populate this * structure using the ptp_read_system_*ts helper functions. - * The timestamp must represent the time at which the SPI slave device has + * The timestamp must represent the time at which the SPI target device has * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. @@ -1083,6 +1092,9 @@ struct spi_transfer { u32 effective_speed_hz; + /* Use %SPI_OFFLOAD_XFER_* from spi-offload.h */ + unsigned int offload_flags; + unsigned int ptp_sts_word_pre; unsigned int ptp_sts_word_post; @@ -1108,6 +1120,7 @@ struct spi_transfer { * @state: for use by whichever driver currently owns the message * @opt_state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed + * @offload: (optional) offload instance used by this message * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1168,6 +1181,12 @@ struct spi_message { */ void *opt_state; + /* + * Optional offload instance used by this message. This must be set + * by the peripheral driver before calling spi_optimize_message(). + */ + struct spi_offload *offload; + /* List of spi_res resources when the SPI message is processed */ struct list_head resources; }; @@ -1600,7 +1619,7 @@ struct spi_board_info { * bus_num is board specific and matches the bus_num of some * spi_controller that will probably be registered later. * - * chip_select reflects how this chip is wired to that master; + * chip_select reflects how this chip is wired to that controller; * it's less than num_chipselect. */ u16 bus_num; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 63dd8cf3c3c2..d3561c4a080e 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -548,6 +548,12 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1(raw_spinlock_bh, raw_spinlock_t, + raw_spin_lock_bh(_T->lock), + raw_spin_unlock_bh(_T->lock)) + +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_bh, _try, raw_spin_trylock_bh(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), @@ -569,6 +575,13 @@ DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, spin_trylock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1(spinlock_bh, spinlock_t, + spin_lock_bh(_T->lock), + spin_unlock_bh(_T->lock)) + +DEFINE_LOCK_GUARD_1_COND(spinlock_bh, _try, + spin_trylock_bh(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 33dcbec71925..51cab2def9ec 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -24,4 +24,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); extern bool no_hash_pointers; int no_hash_pointers_enable(char *str); +/* Used for Rust formatting ('%pA') */ +char *rust_fmt_argument(char *buf, char *end, const void *ptr); + #endif /* _LINUX_KERNEL_SPRINTF_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 08339eb8a01c..900b0d5c05f5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -43,6 +43,18 @@ int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Values for SRCU Tree srcu_data ->srcu_reader_flavor, but also used by rcutorture. */ +#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). +#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). +#define SRCU_READ_FLAVOR_FAST 0x8 // srcu_read_lock_fast(). +#define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ + SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // All of the above. +#define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) + // Flavors requiring synchronize_rcu() + // instead of smp_mb(). +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); + #ifdef CONFIG_TINY_SRCU #include <linux/srcutiny.h> #elif defined(CONFIG_TREE_SRCU) @@ -54,15 +66,6 @@ int init_srcu_struct(struct srcu_struct *ssp); void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void (*func)(struct rcu_head *head)); void cleanup_srcu_struct(struct srcu_struct *ssp); -int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); -#ifdef CONFIG_TINY_SRCU -#define __srcu_read_lock_lite __srcu_read_lock -#define __srcu_read_unlock_lite __srcu_read_unlock -#else // #ifdef CONFIG_TINY_SRCU -int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp); -#endif // #else // #ifdef CONFIG_TINY_SRCU void synchronize_srcu(struct srcu_struct *ssp); #define SRCU_GET_STATE_COMPLETED 0x1 @@ -232,13 +235,14 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). * - * The return value from srcu_read_lock() must be passed unaltered - * to the matching srcu_read_unlock(). Note that srcu_read_lock() and - * the matching srcu_read_unlock() must occur in the same context, for - * example, it is illegal to invoke srcu_read_unlock() in an irq handler - * if the matching srcu_read_lock() was invoked in process context. Or, - * for that matter to invoke srcu_read_unlock() from one task and the - * matching srcu_read_lock() from another. + * The return value from srcu_read_lock() is guaranteed to be + * non-negative. This value must be passed unaltered to the matching + * srcu_read_unlock(). Note that srcu_read_lock() and the matching + * srcu_read_unlock() must occur in the same context, for example, it is + * illegal to invoke srcu_read_unlock() in an irq handler if the matching + * srcu_read_lock() was invoked in process context. Or, for that matter to + * invoke srcu_read_unlock() from one task and the matching srcu_read_lock() + * from another. */ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { @@ -251,6 +255,51 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) } /** + * srcu_read_lock_fast - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but for a light-weight + * smp_mb()-free reader. See srcu_read_lock() for more information. + * + * If srcu_read_lock_fast() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. Note that grace-period auto-expediting is disabled for _fast + * srcu_struct structures because auto-expedited grace periods invoke + * synchronize_rcu_expedited(), IPIs and all. + * + * Note that srcu_read_lock_fast() can be invoked only from those contexts + * where RCU is watching, that is, from contexts where it would be legal + * to invoke rcu_read_lock(). Otherwise, lockdep will complain. + */ +static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires(ssp) +{ + struct srcu_ctr __percpu *retval; + + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + retval = __srcu_read_lock_fast(ssp); + rcu_try_lock_acquire(&ssp->dep_map); + return retval; +} + +/** + * srcu_down_read_fast - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter a semaphore-like SRCU read-side critical section, but for + * a light-weight smp_mb()-free reader. See srcu_read_lock_fast() and + * srcu_down_read() for more information. + * + * The same srcu_struct may be used concurrently by srcu_down_read_fast() + * and srcu_read_lock_fast(). + */ +static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) +{ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + return __srcu_read_lock_fast(ssp); +} + +/** * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. * @@ -271,7 +320,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_read_flavor_lite(ssp); + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_LITE); retval = __srcu_read_lock_lite(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; @@ -328,7 +377,8 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) * srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler. * * Calls to srcu_down_read() may be nested, similar to the manner in - * which calls to down_read() may be nested. + * which calls to down_read() may be nested. The same srcu_struct may be + * used concurrently by srcu_down_read() and srcu_read_lock(). */ static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) { @@ -354,9 +404,40 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) } /** + * srcu_read_unlock_fast - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @scp: return value from corresponding srcu_read_lock_fast(). + * + * Exit a light-weight SRCU read-side critical section. + */ +static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases(ssp) +{ + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + srcu_lock_release(&ssp->dep_map); + __srcu_read_unlock_fast(ssp, scp); +} + +/** + * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @scp: return value from corresponding srcu_read_lock_fast(). + * + * Exit an SRCU read-side critical section, but not necessarily from + * the same context as the maching srcu_down_read_fast(). + */ +static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases(ssp) +{ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + __srcu_read_unlock_fast(ssp, scp); +} + +/** * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). + * @idx: return value from corresponding srcu_read_lock_lite(). * * Exit a light-weight SRCU read-side critical section. */ @@ -372,7 +453,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) /** * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). + * @idx: return value from corresponding srcu_read_lock_nmisafe(). * * Exit an SRCU read-side critical section, but in an NMI-safe manner. */ diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 1321da803274..380260317d98 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -64,13 +64,38 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - preempt_disable(); // Needed for PREEMPT_AUTO + preempt_disable(); // Needed for PREEMPT_LAZY idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1); preempt_enable(); return idx; } +struct srcu_ctr; + +static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) +{ + return (int)(intptr_t)(struct srcu_ctr __force __kernel *)scpp; +} + +static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) +{ + return (struct srcu_ctr __percpu *)(intptr_t)idx; +} + +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +{ + return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp)); +} + +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +{ + __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp)); +} + +#define __srcu_read_lock_lite __srcu_read_lock +#define __srcu_read_unlock_lite __srcu_read_unlock + static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { synchronize_srcu(ssp); @@ -82,7 +107,7 @@ static inline void srcu_barrier(struct srcu_struct *ssp) } #define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) -#define srcu_check_read_flavor_lite(ssp) do { } while (0) +#define srcu_check_read_flavor_force(ssp, read_flavor) do { } while (0) /* Defined here to avoid size increase for non-torture kernels. */ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 490aeecc6bb4..8bed7e6cc4c1 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -17,15 +17,21 @@ struct srcu_node; struct srcu_struct; +/* One element of the srcu_data srcu_ctrs array. */ +struct srcu_ctr { + atomic_long_t srcu_locks; /* Locks per CPU. */ + atomic_long_t srcu_unlocks; /* Unlocks per CPU. */ +}; + /* * Per-CPU structure feeding into leaf srcu_node, similar in function * to rcu_node. */ struct srcu_data { /* Read-side state. */ - atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ - atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ + struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */ int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ + /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; @@ -43,11 +49,6 @@ struct srcu_data { struct srcu_struct *ssp; }; -/* Values for ->srcu_reader_flavor. */ -#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). -#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). -#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). - /* * Node in SRCU combining tree, similar in function to rcu_data. */ @@ -99,7 +100,7 @@ struct srcu_usage { * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - unsigned int srcu_idx; /* Current rdr array element. */ + struct srcu_ctr __percpu *srcu_ctrp; struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ struct lockdep_map dep_map; struct srcu_usage *srcu_sup; /* Update-side data. */ @@ -166,6 +167,7 @@ struct srcu_struct { #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name) \ { \ .sda = &pcpu_name, \ + .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \ __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ } @@ -205,10 +207,77 @@ struct srcu_struct { #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) +int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); +// Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that +// element's index. +static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) +{ + return scpp - &ssp->sda->srcu_ctrs[0]; +} + +// Converts an integer to a per-CPU pointer to the corresponding +// ->srcu_ctrs[] array element. +static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) +{ + return &ssp->sda->srcu_ctrs[idx]; +} + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Returns a pointer that must be passed to the matching + * srcu_read_unlock_fast(). + * + * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side + * critical sections either because they disables interrupts, because they + * are a single instruction, or because they are a read-modify-write atomic + * operation, depending on the whims of the architecture. + * + * This means that __srcu_read_lock_fast() is not all that fast + * on architectures that support NMIs but do not supply NMI-safe + * implementations of this_cpu_inc(). + */ +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +{ + struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); + + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); + if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + this_cpu_inc(scp->srcu_locks.counter); /* Y */ + else + atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */ + barrier(); /* Avoid leaking the critical section. */ + return scp; +} + +/* + * Removes the count for the old reader from the appropriate + * per-CPU element of the srcu_struct. Note that this may well be a + * different CPU than that which was incremented by the corresponding + * srcu_read_lock_fast(), but it must be within the same task. + * + * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side + * critical sections either because they disables interrupts, because they + * are a single instruction, or because they are a read-modify-write atomic + * operation, depending on the whims of the architecture. + * + * This means that __srcu_read_unlock_fast() is not all that fast + * on architectures that support NMIs but do not supply NMI-safe + * implementations of this_cpu_inc(). + */ +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +{ + barrier(); /* Avoid leaking the critical section. */ + if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + else + atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); +} + /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. Returns an index that must be passed to the matching @@ -221,13 +290,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); */ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) { - int idx; + struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); - idx = READ_ONCE(ssp->srcu_idx) & 0x1; - this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */ + this_cpu_inc(scp->srcu_locks.counter); /* Y */ barrier(); /* Avoid leaking the critical section. */ - return idx; + return __srcu_ptr_to_ctr(ssp, scp); } /* @@ -244,22 +312,24 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) { barrier(); /* Avoid leaking the critical section. */ - this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */ + this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter); /* Z */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels. -static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) +// Record reader usage even for CONFIG_PROVE_RCU=n kernels. This is +// needed only for flavors that require grace-period smp_mb() calls to be +// promoted to synchronize_rcu(). +static inline void srcu_check_read_flavor_force(struct srcu_struct *ssp, int read_flavor) { struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); - if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) + if (likely(READ_ONCE(sdp->srcu_reader_flavor) & read_flavor)) return; - // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. - __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + // Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered. + __srcu_check_read_flavor(ssp, read_flavor); } // Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels. diff --git a/include/linux/stat.h b/include/linux/stat.h index 3d900c86981c..be7496a6a0dd 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -50,10 +50,11 @@ struct kstat { struct timespec64 btime; /* File creation time */ u64 blocks; u64 mnt_id; - u32 dio_mem_align; - u32 dio_offset_align; u64 change_cookie; u64 subvol; + u32 dio_mem_align; + u32 dio_offset_align; + u32 dio_read_offset_align; u32 atomic_write_unit_min; u32 atomic_write_unit_max; u32 atomic_write_segments_max; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d79ff252cfdc..c4ec8bb8144e 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -33,7 +33,9 @@ #define STMMAC_CSR_20_35M 0x2 /* MDC = clk_scr_i/16 */ #define STMMAC_CSR_35_60M 0x3 /* MDC = clk_scr_i/26 */ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ -#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/122 */ +#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/124 */ +#define STMMAC_CSR_300_500M 0x6 /* MDC = clk_scr_i/204 */ +#define STMMAC_CSR_500_800M 0x7 /* MDC = clk_scr_i/324 */ /* MTL algorithms identifiers */ #define MTL_TX_ALGORITHM_WRR 0x0 @@ -76,6 +78,7 @@ | DMA_AXI_BLEN_32 | DMA_AXI_BLEN_64 \ | DMA_AXI_BLEN_128 | DMA_AXI_BLEN_256) +struct clk; struct stmmac_priv; /* Platfrom data for platform device structure's platform_data field */ @@ -180,7 +183,8 @@ struct dwmac4_addrs { #define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9) #define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10) #define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11) -#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(12) +#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP BIT(12) +#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(13) struct plat_stmmacenet_data { int bus_id; @@ -229,11 +233,17 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, + phy_interface_t interface, int speed); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); void (*speed_mode_2500)(struct net_device *ndev, void *priv); + int (*mac_finish)(struct net_device *ndev, + void *priv, + unsigned int mode, + phy_interface_t interface); void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); @@ -250,8 +260,11 @@ struct plat_stmmacenet_data { struct clk *stmmac_clk; struct clk *pclk; struct clk *clk_ptp_ref; - unsigned int clk_ptp_rate; - unsigned int clk_ref_rate; + struct clk *clk_tx_i; /* clk_tx_i to MAC core */ + unsigned long clk_ptp_rate; + unsigned long clk_ref_rate; + struct clk_bulk_data *clks; + int num_clks; unsigned int mult_fact_100ns; s32 ptp_max_adj; u32 cdc_error_adj; @@ -263,7 +276,7 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; int has_xgmac; u8 vlan_fail_q; - unsigned int eee_usecs_rate; + unsigned long eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; int msi_mac_vec; diff --git a/include/linux/string.h b/include/linux/string.h index 493ac4862c77..01621ad0f598 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -4,6 +4,7 @@ #include <linux/args.h> #include <linux/array_size.h> +#include <linux/cleanup.h> /* for DEFINE_FREE() */ #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ @@ -312,6 +313,8 @@ extern void *kmemdup_array(const void *src, size_t count, size_t element_size, g extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); +DEFINE_FREE(argv_free, char **, if (!IS_ERR_OR_NULL(_T)) argv_free(_T)) + /* lib/cmdline.c */ extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); @@ -333,8 +336,8 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); #define sysfs_match_string(_a, _s) __sysfs_match_string(_a, ARRAY_SIZE(_a), _s) #ifdef CONFIG_BINARY_PRINTF -int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); -int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); +__printf(3, 0) int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); +__printf(3, 0) int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, @@ -411,8 +414,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem_pad(dest, src, pad) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_noncstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_cstr(src) + \ + __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ @@ -434,8 +440,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_noncstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_cstr(src) + \ + __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ @@ -453,8 +462,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_cstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_noncstr(src) + \ + __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ \ @@ -478,8 +490,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr_pad(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_cstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_noncstr(src) + \ + __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ \ diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 120ca0f28e95..f3ba4f52ff26 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -41,23 +41,23 @@ static inline const char *str_high_low(bool v) } #define str_low_high(v) str_high_low(!(v)) -static inline const char *str_read_write(bool v) -{ - return v ? "read" : "write"; -} -#define str_write_read(v) str_read_write(!(v)) - static inline const char *str_on_off(bool v) { return v ? "on" : "off"; } #define str_off_on(v) str_on_off(!(v)) -static inline const char *str_yes_no(bool v) +static inline const char *str_read_write(bool v) { - return v ? "yes" : "no"; + return v ? "read" : "write"; } -#define str_no_yes(v) str_yes_no(!(v)) +#define str_write_read(v) str_read_write(!(v)) + +static inline const char *str_true_false(bool v) +{ + return v ? "true" : "false"; +} +#define str_false_true(v) str_true_false(!(v)) static inline const char *str_up_down(bool v) { @@ -65,11 +65,11 @@ static inline const char *str_up_down(bool v) } #define str_down_up(v) str_up_down(!(v)) -static inline const char *str_true_false(bool v) +static inline const char *str_yes_no(bool v) { - return v ? "true" : "false"; + return v ? "yes" : "no"; } -#define str_false_true(v) str_true_false(!(v)) +#define str_no_yes(v) str_yes_no(!(v)) /** * str_plural - Return the simple pluralization based on English counts diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 35766963dd14..e783132e481f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -222,6 +222,8 @@ static inline bool cache_is_expired(struct cache_detail *detail, struct cache_he return detail->flush_time >= h->last_refresh; } +extern int cache_check_rcu(struct cache_detail *detail, + struct cache_head *h, struct cache_req *rqstp); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 5321585c778f..f8b406b0a1af 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -64,7 +64,9 @@ struct rpc_clnt { cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ cl_chatty : 1,/* be verbose */ - cl_shutdown : 1;/* rpc immediate -EIO */ + cl_shutdown : 1,/* rpc immediate -EIO */ + cl_netunreach_fatal : 1; + /* Treat ENETUNREACH errors as fatal */ struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -93,6 +95,7 @@ struct rpc_clnt { const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ struct super_block *pipefs_sb; + atomic_t cl_task_count; }; /* @@ -174,6 +177,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) #define RPC_CLNT_CREATE_CONNECTED (1UL << 12) +#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h deleted file mode 100644 index 3ccecd0ad229..000000000000 --- a/include/linux/sunrpc/gss_asn1.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * linux/include/linux/sunrpc/gss_asn1.h - * - * minimal asn1 for generic encoding/decoding of gss tokens - * - * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, - * lib/gssapi/krb5/gssapiP_krb5.h, and others - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - */ - -/* - * Copyright 1995 by the Massachusetts Institute of Technology. - * All Rights Reserved. - * - * Export of this software from the United States of America may - * require a specific license from the United States Government. - * It is the responsibility of any person or organization contemplating - * export to obtain such a license before exporting. - * - * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and - * distribute this software and its documentation for any purpose and - * without fee is hereby granted, provided that the above copyright - * notice appear in all copies and that both that copyright notice and - * this permission notice appear in supporting documentation, and that - * the name of M.I.T. not be used in advertising or publicity pertaining - * to distribution of the software without specific, written prior - * permission. Furthermore if you modify this software you must label - * your software as modified software and not distribute it in such a - * fashion that it might be confused with the original M.I.T. software. - * M.I.T. makes no representations about the suitability of - * this software for any purpose. It is provided "as is" without express - * or implied warranty. - * - */ - - -#include <linux/sunrpc/gss_api.h> - -#define SIZEOF_INT 4 - -/* from gssapi_err_generic.h */ -#define G_BAD_SERVICE_NAME (-2045022976L) -#define G_BAD_STRING_UID (-2045022975L) -#define G_NOUSER (-2045022974L) -#define G_VALIDATE_FAILED (-2045022973L) -#define G_BUFFER_ALLOC (-2045022972L) -#define G_BAD_MSG_CTX (-2045022971L) -#define G_WRONG_SIZE (-2045022970L) -#define G_BAD_USAGE (-2045022969L) -#define G_UNKNOWN_QOP (-2045022968L) -#define G_NO_HOSTNAME (-2045022967L) -#define G_BAD_HOSTNAME (-2045022966L) -#define G_WRONG_MECH (-2045022965L) -#define G_BAD_TOK_HEADER (-2045022964L) -#define G_BAD_DIRECTION (-2045022963L) -#define G_TOK_TRUNC (-2045022962L) -#define G_REFLECT (-2045022961L) -#define G_WRONG_TOKID (-2045022960L) - -#define g_OID_equal(o1,o2) \ - (((o1)->len == (o2)->len) && \ - (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0)) - -u32 g_verify_token_header( - struct xdr_netobj *mech, - int *body_size, - unsigned char **buf_in, - int toksize); - -int g_token_size( - struct xdr_netobj *mech, - unsigned int body_size); - -void g_make_token_header( - struct xdr_netobj *mech, - int body_size, - unsigned char **buf); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 78a80bf3fdcb..43950b5237c8 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -40,7 +40,6 @@ #include <crypto/skcipher.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_err.h> -#include <linux/sunrpc/gss_asn1.h> /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index eac57914dcf3..ccba79ebf893 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -134,6 +134,7 @@ struct rpc_task_setup { #define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ +#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e68fecf6eab5..74658cca0f38 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -72,16 +72,12 @@ struct svc_serv { spinlock_t sv_lock; unsigned int sv_nprogs; /* Number of sv_programs */ unsigned int sv_nrthreads; /* # of server threads */ - unsigned int sv_maxconn; /* max connections allowed or - * '0' causing max to be based - * on number of threads. */ - unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_tempsocks; /* all temporary sockets */ - int sv_tmpcnt; /* count of temporary sockets */ + int sv_tmpcnt; /* count of temporary "valid" sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ char * sv_name; /* service name */ @@ -327,12 +323,7 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) */ static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err) { - rqstp->rq_err = err; - /* memory barrier ensures assignment to error above is visible before - * waitqueue_active() test below completes. - */ - smp_mb(); - wake_up_var(&rqstp->rq_err); + store_release_wake_up(&rqstp->rq_err, err); if (err) kthread_exit(1); } diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0981e35a9fed..72be60952579 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -99,8 +99,30 @@ enum { XPT_HANDSHAKE, /* xprt requests a handshake */ XPT_TLS_SESSION, /* transport-layer security established */ XPT_PEER_AUTH, /* peer has been authenticated */ + XPT_PEER_VALID, /* peer has presented a filehandle that + * it has access to. It is NOT counted + * in ->sv_tmpcnt. + */ }; +/* + * Maximum number of "tmp" connections - those without XPT_PEER_VALID - + * permitted on any service. + */ +#define XPT_MAX_TMP_CONN 64 + +static inline void svc_xprt_set_valid(struct svc_xprt *xpt) +{ + if (test_bit(XPT_TEMP, &xpt->xpt_flags) && + !test_and_set_bit(XPT_PEER_VALID, &xpt->xpt_flags)) { + struct svc_serv *serv = xpt->xpt_server; + + spin_lock(&serv->sv_lock); + serv->sv_tmpcnt -= 1; + spin_unlock(&serv->sv_lock); + } +} + static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) { spin_lock(&xpt->xpt_lock); diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h new file mode 100644 index 000000000000..cf21a14aa885 --- /dev/null +++ b/include/linux/sunrpc/xdrgen/nfs4_1.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Generated by xdrgen. Manual edits will be lost. */ +/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */ +/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */ + +#ifndef _LINUX_XDRGEN_NFS4_1_DEF_H +#define _LINUX_XDRGEN_NFS4_1_DEF_H + +#include <linux/types.h> +#include <linux/sunrpc/xdrgen/_defs.h> + +typedef s64 int64_t; + +typedef u32 uint32_t; + +typedef struct { + u32 count; + uint32_t *element; +} bitmap4; + +struct nfstime4 { + int64_t seconds; + uint32_t nseconds; +}; + +typedef bool fattr4_offline; + +enum { FATTR4_OFFLINE = 83 }; + +struct open_arguments4 { + bitmap4 oa_share_access; + bitmap4 oa_share_deny; + bitmap4 oa_share_access_want; + bitmap4 oa_open_claim; + bitmap4 oa_create_mode; +}; + +enum open_args_share_access4 { + OPEN_ARGS_SHARE_ACCESS_READ = 1, + OPEN_ARGS_SHARE_ACCESS_WRITE = 2, + OPEN_ARGS_SHARE_ACCESS_BOTH = 3, +}; +typedef enum open_args_share_access4 open_args_share_access4; + +enum open_args_share_deny4 { + OPEN_ARGS_SHARE_DENY_NONE = 0, + OPEN_ARGS_SHARE_DENY_READ = 1, + OPEN_ARGS_SHARE_DENY_WRITE = 2, + OPEN_ARGS_SHARE_DENY_BOTH = 3, +}; +typedef enum open_args_share_deny4 open_args_share_deny4; + +enum open_args_share_access_want4 { + OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG = 3, + OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG = 4, + OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL = 5, + OPEN_ARGS_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 17, + OPEN_ARGS_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 18, + OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 20, + OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 21, +}; +typedef enum open_args_share_access_want4 open_args_share_access_want4; + +enum open_args_open_claim4 { + OPEN_ARGS_OPEN_CLAIM_NULL = 0, + OPEN_ARGS_OPEN_CLAIM_PREVIOUS = 1, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR = 2, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV = 3, + OPEN_ARGS_OPEN_CLAIM_FH = 4, + OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH = 5, + OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH = 6, +}; +typedef enum open_args_open_claim4 open_args_open_claim4; + +enum open_args_createmode4 { + OPEN_ARGS_CREATEMODE_UNCHECKED4 = 0, + OPEN_ARGS_CREATE_MODE_GUARDED = 1, + OPEN_ARGS_CREATEMODE_EXCLUSIVE4 = 2, + OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1 = 3, +}; +typedef enum open_args_createmode4 open_args_createmode4; + +typedef struct open_arguments4 fattr4_open_arguments; + +enum { FATTR4_OPEN_ARGUMENTS = 86 }; + +enum { OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010 }; + +typedef struct nfstime4 fattr4_time_deleg_access; + +typedef struct nfstime4 fattr4_time_deleg_modify; + +enum { FATTR4_TIME_DELEG_ACCESS = 84 }; + +enum { FATTR4_TIME_DELEG_MODIFY = 85 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_MASK = 0xFF00 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE = 0x0000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_READ_DELEG = 0x0100 }; + +enum { OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG = 0x0200 }; + +enum { OPEN4_SHARE_ACCESS_WANT_ANY_DELEG = 0x0300 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_DELEG = 0x0400 }; + +enum { OPEN4_SHARE_ACCESS_WANT_CANCEL = 0x0500 }; + +enum { OPEN4_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 0x10000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 0x20000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000 }; + +enum open_delegation_type4 { + OPEN_DELEGATE_NONE = 0, + OPEN_DELEGATE_READ = 1, + OPEN_DELEGATE_WRITE = 2, + OPEN_DELEGATE_NONE_EXT = 3, + OPEN_DELEGATE_READ_ATTRS_DELEG = 4, + OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5, +}; +typedef enum open_delegation_type4 open_delegation_type4; + +#define NFS4_int64_t_sz \ + (XDR_hyper) +#define NFS4_uint32_t_sz \ + (XDR_unsigned_int) +#define NFS4_bitmap4_sz (XDR_unsigned_int) +#define NFS4_nfstime4_sz \ + (NFS4_int64_t_sz + NFS4_uint32_t_sz) +#define NFS4_fattr4_offline_sz \ + (XDR_bool) +#define NFS4_open_arguments4_sz \ + (NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz) +#define NFS4_open_args_share_access4_sz (XDR_int) +#define NFS4_open_args_share_deny4_sz (XDR_int) +#define NFS4_open_args_share_access_want4_sz (XDR_int) +#define NFS4_open_args_open_claim4_sz (XDR_int) +#define NFS4_open_args_createmode4_sz (XDR_int) +#define NFS4_fattr4_open_arguments_sz \ + (NFS4_open_arguments4_sz) +#define NFS4_fattr4_time_deleg_access_sz \ + (NFS4_nfstime4_sz) +#define NFS4_fattr4_time_deleg_modify_sz \ + (NFS4_nfstime4_sz) +#define NFS4_open_delegation_type4_sz (XDR_int) + +#endif /* _LINUX_XDRGEN_NFS4_1_DEF_H */ diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index c0514c684b2c..e4db5022fe92 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -56,6 +56,7 @@ extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt); extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline); +extern struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps); extern void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); @@ -75,7 +76,6 @@ extern struct rpc_xprt_switch *xprt_iter_xchg_switch( struct rpc_xprt_switch *newswitch); extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi); -extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, diff --git a/include/linux/swap.h b/include/linux/swap.h index f3e0ac20c2e8..db46b25a65ae 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -24,7 +24,6 @@ struct pagevec; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff -#define SWAP_FLAG_PRIO_SHIFT 0 #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ #define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ @@ -74,14 +73,13 @@ static inline int current_is_kswapd(void) * to a special SWP_DEVICE_{READ|WRITE} entry. * * When a page is mapped by the device for exclusive access we set the CPU page - * table entries to special SWP_DEVICE_EXCLUSIVE_* entries. + * table entries to a special SWP_DEVICE_EXCLUSIVE entry. */ #ifdef CONFIG_DEVICE_PRIVATE -#define SWP_DEVICE_NUM 4 +#define SWP_DEVICE_NUM 3 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) -#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) -#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3) +#define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) #else #define SWP_DEVICE_NUM 0 #endif @@ -219,10 +217,10 @@ enum { SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ /* add others here before... */ - SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL +#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10) #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ @@ -257,18 +255,27 @@ struct swap_cluster_info { u8 order; struct list_head list; }; -#define CLUSTER_FLAG_FREE 1 /* This cluster is free */ -#define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */ -#define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */ -#define CLUSTER_FLAG_FULL 8 /* This cluster is on full list */ + +/* All on-list cluster must have a non-zero flag. */ +enum swap_cluster_flags { + CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ + CLUSTER_FLAG_FREE, + CLUSTER_FLAG_NONFULL, + CLUSTER_FLAG_FRAG, + /* Clusters with flags above are allocatable */ + CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, + CLUSTER_FLAG_FULL, + CLUSTER_FLAG_DISCARD, + CLUSTER_FLAG_MAX, +}; /* * The first page in the swap file is the swap header, which is always marked * bad to prevent it from being allocated as an entry. This also prevents the * cluster to which it belongs being marked free. Therefore 0 is safe to use as - * a sentinel to indicate next is not valid in percpu_cluster. + * a sentinel to indicate an entry is not valid. */ -#define SWAP_NEXT_INVALID 0 +#define SWAP_ENTRY_INVALID 0 #ifdef CONFIG_THP_SWAP #define SWAP_NR_ORDERS (PMD_ORDER + 1) @@ -277,11 +284,10 @@ struct swap_cluster_info { #endif /* - * We assign a cluster to each CPU, so each CPU can allocate swap entry from - * its own cluster and swapout sequentially. The purpose is to optimize swapout - * throughput. + * We keep using same cluster for rotational device so IO will be sequential. + * The purpose is to optimize SWAP throughput on these device. */ -struct percpu_cluster { +struct swap_sequential_cluster { unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ }; @@ -304,15 +310,11 @@ struct swap_info_struct { /* list of cluster that contains at least one free slot */ struct list_head frag_clusters[SWAP_NR_ORDERS]; /* list of cluster that are fragmented or contented */ - unsigned int frag_cluster_nr[SWAP_NR_ORDERS]; - unsigned int lowest_bit; /* index of first free in swap_map */ - unsigned int highest_bit; /* index of last free in swap_map */ + atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int pages; /* total of usable pages of swap */ - unsigned int inuse_pages; /* number of those currently in use */ - unsigned int cluster_next; /* likely index for next allocation */ - unsigned int cluster_nr; /* countdown to next cluster search */ - unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ - struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ + atomic_long_t inuse_pages; /* number of those currently in use */ + struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */ + spinlock_t global_cluster_lock; /* Serialize usage of global cluster */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ @@ -426,19 +428,10 @@ extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); #ifdef CONFIG_NUMA -extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; -#else -#define node_reclaim_mode 0 #endif -static inline bool node_reclaim_enabled(void) -{ - /* Is any node_reclaim_mode bit set? */ - return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); -} - void check_move_unevictable_folios(struct folio_batch *fbatch); extern void __meminit kswapd_run(int nid); @@ -463,7 +456,6 @@ void free_pages_and_swap_cache(struct encoded_page **, int); extern atomic_long_t nr_swap_pages; extern long total_swap_pages; extern atomic_t nr_rotate_swap; -extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) @@ -477,24 +469,22 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -swp_entry_t folio_alloc_swap(struct folio *folio); +int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); -extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t, int); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t entry, int nr); extern void swap_free_nr(swp_entry_t entry, int nr_pages); -extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); -extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry); +extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; @@ -577,9 +567,9 @@ static inline int __swap_count(swp_entry_t entry) return 0; } -static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +static inline bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry) { - return 0; + return false; } static inline int swp_swapcount(swp_entry_t entry) @@ -587,11 +577,9 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -static inline swp_entry_t folio_alloc_swap(struct folio *folio) +static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask) { - swp_entry_t entry; - entry.val = 0; - return entry; + return -EINVAL; } static inline bool folio_free_swap(struct folio *folio) @@ -652,7 +640,6 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) #endif #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) @@ -673,10 +660,6 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct folio *folio); #else -static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) -{ -} - static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) { diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h index ae73a87775b3..91cdf12190a0 100644 --- a/include/linux/swap_cgroup.h +++ b/include/linux/swap_cgroup.h @@ -6,10 +6,8 @@ #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) -extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, - unsigned short old, unsigned short new); -extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, - unsigned int nr_ents); +extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent); +extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents); extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern void swap_cgroup_swapoff(int type); @@ -17,8 +15,12 @@ extern void swap_cgroup_swapoff(int type); #else static inline -unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, - unsigned int nr_ents) +void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent) +{ +} + +static inline +unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents) { return 0; } diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h deleted file mode 100644 index 15adfb8c813a..000000000000 --- a/include/linux/swap_slots.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SWAP_SLOTS_H -#define _LINUX_SWAP_SLOTS_H - -#include <linux/swap.h> -#include <linux/spinlock.h> -#include <linux/mutex.h> - -#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH -#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE) -#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE) - -struct swap_slots_cache { - bool lock_initialized; - struct mutex alloc_lock; /* protects slots, nr, cur */ - swp_entry_t *slots; - int nr; - int cur; - spinlock_t free_lock; /* protects slots_ret, n_ret */ - swp_entry_t *slots_ret; - int n_ret; -}; - -void disable_swap_slots_cache_lock(void); -void reenable_swap_slots_cache_unlock(void); -void enable_swap_slots_cache(void); -void free_swap_slot(swp_entry_t entry); - -extern bool swap_slot_cache_enabled; - -#endif /* _LINUX_SWAP_SLOTS_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 96f26e29fefe..64ea151a7ae3 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -186,26 +186,16 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } -static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) +static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { - return swp_entry(SWP_DEVICE_EXCLUSIVE_READ, offset); -} - -static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) -{ - return swp_entry(SWP_DEVICE_EXCLUSIVE_WRITE, offset); + return swp_entry(SWP_DEVICE_EXCLUSIVE, offset); } static inline bool is_device_exclusive_entry(swp_entry_t entry) { - return swp_type(entry) == SWP_DEVICE_EXCLUSIVE_READ || - swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE; + return swp_type(entry) == SWP_DEVICE_EXCLUSIVE; } -static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE); -} #else /* CONFIG_DEVICE_PRIVATE */ static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { @@ -227,12 +217,7 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry) return false; } -static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) -{ - return swp_entry(0, 0); -} - -static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) +static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(0, 0); } @@ -242,10 +227,6 @@ static inline bool is_device_exclusive_entry(swp_entry_t entry) return false; } -static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) -{ - return false; -} #endif /* CONFIG_DEVICE_PRIVATE */ #ifdef CONFIG_MIGRATION diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c6333204d451..e5603cc91963 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -951,6 +951,10 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); +asmlinkage long sys_open_tree_attr(int dfd, const char __user *path, + unsigned flags, + struct mount_attr __user *uattr, + size_t usize); asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, int to_dfd, const char __user *to_path, unsigned int ms_flags); @@ -1266,14 +1270,14 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, AT_SYMLINK_NOFOLLOW); } -extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); +int do_sys_ftruncate(unsigned int fd, loff_t length, int small); static inline long ksys_ftruncate(unsigned int fd, loff_t length) { return do_sys_ftruncate(fd, length, 1); } -extern long do_sys_truncate(const char __user *pathname, loff_t length); +int do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) { diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 0f2fcd244523..18f7e1fd093c 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -293,7 +293,7 @@ __ATTRIBUTE_GROUPS(_name) #define BIN_ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ - .bin_attrs = _name##_attrs, \ + .bin_attrs_new = _name##_attrs, \ }; \ __ATTRIBUTE_GROUPS(_name) @@ -511,7 +511,7 @@ __printf(3, 4) int sysfs_emit_at(char *buf, int at, const char *fmt, ...); ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count); #else /* CONFIG_SYSFS */ @@ -774,7 +774,7 @@ static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) static inline ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h deleted file mode 100644 index 5cf77dbb8d86..000000000000 --- a/include/linux/sysv_fs.h +++ /dev/null @@ -1,214 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SYSV_FS_H -#define _LINUX_SYSV_FS_H - -#define __packed2__ __attribute__((packed, aligned(2))) - - -#ifndef __KERNEL__ -typedef u16 __fs16; -typedef u32 __fs16; -#endif - -/* inode numbers are 16 bit */ -typedef __fs16 sysv_ino_t; - -/* Block numbers are 24 bit, sometimes stored in 32 bit. - On Coherent FS, they are always stored in PDP-11 manner: the least - significant 16 bits come last. */ -typedef __fs32 sysv_zone_t; - -/* 0 is non-existent */ -#define SYSV_BADBL_INO 1 /* inode of bad blocks file */ -#define SYSV_ROOT_INO 2 /* inode of root directory */ - - -/* Xenix super-block data on disk */ -#define XENIX_NICINOD 100 /* number of inode cache entries */ -#define XENIX_NICFREE 100 /* number of free block list chunk entries */ -struct xenix_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= XENIX_NICFREE */ - sysv_zone_t s_free[XENIX_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= XENIX_NICINOD */ - sysv_ino_t s_inode[XENIX_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - __fs16 s_dinfo[4]; /* device information ?? */ - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - char s_clean; /* set to 0x46 when filesystem is properly unmounted */ - char s_fill[371]; - s32 s_magic; /* version of file system */ - __fs32 s_type; /* type of file system: 1 for 512 byte blocks - 2 for 1024 byte blocks - 3 for 2048 byte blocks */ - -}; - -/* - * SystemV FS comes in two variants: - * sysv2: System V Release 2 (e.g. Microport), structure elements aligned(2). - * sysv4: System V Release 4 (e.g. Consensys), structure elements aligned(4). - */ -#define SYSV_NICINOD 100 /* number of inode cache entries */ -#define SYSV_NICFREE 50 /* number of free block list chunk entries */ - -/* SystemV4 super-block data on disk */ -struct sysv4_super_block { - __fs16 s_isize; /* index of first data zone */ - u16 s_pad0; - __fs32 s_fsize; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */ - u16 s_pad1; - sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */ - u16 s_pad2; - sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time; /* time of last super block update */ - __fs16 s_dinfo[4]; /* device information ?? */ - __fs32 s_tfree; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - u16 s_pad3; - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - s32 s_fill[12]; - __fs32 s_state; /* file system state: 0x7c269d38-s_time means clean */ - s32 s_magic; /* version of file system */ - __fs32 s_type; /* type of file system: 1 for 512 byte blocks - 2 for 1024 byte blocks */ -}; - -/* SystemV2 super-block data on disk */ -struct sysv2_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */ - sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */ - sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - __fs16 s_dinfo[4]; /* device information ?? */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - s32 s_fill[14]; - __fs32 s_state; /* file system state: 0xcb096f43 means clean */ - s32 s_magic; /* version of file system */ - __fs32 s_type; /* type of file system: 1 for 512 byte blocks - 2 for 1024 byte blocks */ -}; - -/* V7 super-block data on disk */ -#define V7_NICINOD 100 /* number of inode cache entries */ -#define V7_NICFREE 50 /* number of free block list chunk entries */ -struct v7_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= V7_NICFREE */ - sysv_zone_t s_free[V7_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= V7_NICINOD */ - sysv_ino_t s_inode[V7_NICINOD]; /* some free inodes */ - /* locks, not used by Linux or V7: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - /* the following fields are not maintained by V7: */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - __fs16 s_m; /* interleave factor */ - __fs16 s_n; /* interleave factor */ - char s_fname[6]; /* file system name */ - char s_fpack[6]; /* file system pack name */ -}; -/* Constants to aid sanity checking */ -/* This is not a hard limit, nor enforced by v7 kernel. It's actually just - * the limit used by Seventh Edition's ls, though is high enough to assume - * that no reasonable file system would have that much entries in root - * directory. Thus, if we see anything higher, we just probably got the - * endiannes wrong. */ -#define V7_NFILES 1024 -/* The disk addresses are three-byte (despite direct block addresses being - * aligned word-wise in inode). If the most significant byte is non-zero, - * something is most likely wrong (not a filesystem, bad bytesex). */ -#define V7_MAXSIZE 0x00ffffff - -/* Coherent super-block data on disk */ -#define COH_NICINOD 100 /* number of inode cache entries */ -#define COH_NICFREE 64 /* number of free block list chunk entries */ -struct coh_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= COH_NICFREE */ - sysv_zone_t s_free[COH_NICFREE] __packed2__; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= COH_NICINOD */ - sysv_ino_t s_inode[COH_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - __fs16 s_interleave_m; /* interleave factor */ - __fs16 s_interleave_n; - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - __fs32 s_unique; /* zero, not used */ -}; - -/* SystemV/Coherent inode data on disk */ -struct sysv_inode { - __fs16 i_mode; - __fs16 i_nlink; - __fs16 i_uid; - __fs16 i_gid; - __fs32 i_size; - u8 i_data[3*(10+1+1+1)]; - u8 i_gen; - __fs32 i_atime; /* time of last access */ - __fs32 i_mtime; /* time of last modification */ - __fs32 i_ctime; /* time of creation */ -}; - -/* SystemV/Coherent directory entry on disk */ -#define SYSV_NAMELEN 14 /* max size of name in struct sysv_dir_entry */ -struct sysv_dir_entry { - sysv_ino_t inode; - char name[SYSV_NAMELEN]; /* up to 14 characters, the rest are zeroes */ -}; - -#define SYSV_DIRSIZE sizeof(struct sysv_dir_entry) /* size of every directory entry */ - -#endif /* _LINUX_SYSV_FS_H */ diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 2964171856e0..0646804860ff 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -19,9 +19,6 @@ enum task_work_notify_mode { TWA_SIGNAL, TWA_SIGNAL_NO_IPI, TWA_NMI_CURRENT, - - TWA_FLAGS = 0xff00, - TWAF_NO_ALLOC = 0x0100, }; static inline bool task_work_pending(struct task_struct *task) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f88daaa76d83..5c7c5038d47b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -160,6 +160,8 @@ struct tcp_request_sock { u32 rcv_isn; u32 snt_isn; u32 ts_off; + u32 snt_tsval_first; + u32 snt_tsval_last; u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# @@ -242,6 +244,9 @@ struct tcp_sock { struct minmax rtt_min; /* OOO segments go in this rbtree. Socket lock must be held. */ struct rb_root out_of_order_queue; +#if defined(CONFIG_TLS_DEVICE) + void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); +#endif u32 snd_ssthresh; /* Slow start size threshold */ u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ __cacheline_group_end(tcp_sock_read_rx); @@ -335,7 +340,7 @@ struct tcp_sock { } rcv_rtt_est; /* Receiver queue space */ struct { - u32 space; + int space; u32 seq; u64 time; } rcvq_space; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 754802478b96..0b5ed6821080 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -86,8 +86,6 @@ struct thermal_trip { #define THERMAL_TRIP_PRIV_TO_INT(_val_) (uintptr_t)(_val_) #define THERMAL_INT_TO_TRIP_PRIV(_val_) (void *)(uintptr_t)(_val_) -struct thermal_zone_device; - struct cooling_spec { unsigned long upper; /* Highest cooling state */ unsigned long lower; /* Lowest cooling state */ @@ -295,6 +293,10 @@ static inline struct thermal_zone_device *thermal_tripless_zone_device_register( static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } +static inline void thermal_zone_device_update(struct thermal_zone_device *tz, + enum thermal_notify_event event) +{ } + static inline struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index cf2446c9c30d..dd925d84fa46 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -217,54 +217,6 @@ static inline int arch_within_stack_frames(const void * const stack, } #endif -#ifdef CONFIG_HARDENED_USERCOPY -extern void __check_object_size(const void *ptr, unsigned long n, - bool to_user); - -static __always_inline void check_object_size(const void *ptr, unsigned long n, - bool to_user) -{ - if (!__builtin_constant_p(n)) - __check_object_size(ptr, n, to_user); -} -#else -static inline void check_object_size(const void *ptr, unsigned long n, - bool to_user) -{ } -#endif /* CONFIG_HARDENED_USERCOPY */ - -extern void __compiletime_error("copy source size is too small") -__bad_copy_from(void); -extern void __compiletime_error("copy destination size is too small") -__bad_copy_to(void); - -void __copy_overflow(int size, unsigned long count); - -static inline void copy_overflow(int size, unsigned long count) -{ - if (IS_ENABLED(CONFIG_BUG)) - __copy_overflow(size, count); -} - -static __always_inline __must_check bool -check_copy_size(const void *addr, size_t bytes, bool is_source) -{ - int sz = __builtin_object_size(addr, 0); - if (unlikely(sz >= 0 && sz < bytes)) { - if (!__builtin_constant_p(bytes)) - copy_overflow(sz, bytes); - else if (is_source) - __bad_copy_from(); - else - __bad_copy_to(); - return false; - } - if (WARN_ON_ONCE(bytes > INT_MAX)) - return false; - check_object_size(addr, bytes, is_source); - return true; -} - #ifndef arch_setup_new_exec static inline void arch_setup_new_exec(void) { } #endif diff --git a/include/linux/time64.h b/include/linux/time64.h index f1bcea8c124a..9934331c7b86 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -49,6 +49,11 @@ static inline int timespec64_equal(const struct timespec64 *a, return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } +static inline bool timespec64_is_epoch(const struct timespec64 *ts) +{ + return ts->tv_sec == 0 && ts->tv_nsec == 0; +} + /* * lhs < rhs: return <0 * lhs == rhs: return 0 diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 876e31b4461d..0b8b32bf0655 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -165,6 +165,4 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #endif -struct vdso_data *arch_get_vdso_data(void *vvar_page); - #endif /* _LINUX_TIMENS_H */ diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e39d4d563b19..785048a3b3e6 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,7 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +101,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +122,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +145,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 0e035f675efe..542773650200 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -264,18 +264,6 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); /** - * struct ktime_timestamps - Simultaneous mono/boot/real timestamps - * @mono: Monotonic timestamp - * @boot: Boottime timestamp - * @real: Realtime timestamp - */ -struct ktime_timestamps { - u64 mono; - u64 boot; - u64 real; -}; - -/** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value * @cycles: Clocksource counter value to produce the system times @@ -345,9 +333,6 @@ extern int get_device_system_crosststamp( */ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); -/* NMI safe mono/boot/realtime timestamps */ -extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap); - /* * Persistent clock related interfaces */ diff --git a/include/linux/timer.h b/include/linux/timer.h index e67ecd1cbc97..10596d7c3a34 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -30,7 +30,7 @@ * * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and * it's safe to wait for the completion of the running instance from - * IRQ handlers, for example, by calling del_timer_sync(). + * IRQ handlers, for example, by calling timer_delete_sync(). * * Note: The irq disabled callback execution is a special case for * workqueue locking issues. It's not meant for executing random crap @@ -168,40 +168,6 @@ extern int timer_delete(struct timer_list *timer); extern int timer_shutdown_sync(struct timer_list *timer); extern int timer_shutdown(struct timer_list *timer); -/** - * del_timer_sync - Delete a pending timer and wait for a running callback - * @timer: The timer to be deleted - * - * See timer_delete_sync() for detailed explanation. - * - * Do not use in new code. Use timer_delete_sync() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer_sync(struct timer_list *timer) -{ - return timer_delete_sync(timer); -} - -/** - * del_timer - Delete a pending timer - * @timer: The timer to be deleted - * - * See timer_delete() for detailed explanation. - * - * Do not use in new code. Use timer_delete() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer(struct timer_list *timer) -{ - return timer_delete(timer); -} - extern void init_timers(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); diff --git a/include/linux/topology.h b/include/linux/topology.h index 52f5850730b3..24e715f0f6d2 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -240,6 +240,29 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif +#ifndef topology_is_primary_thread + +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + /* + * When disabling SMT, the primary thread of the SMT will remain + * enabled/active. Architectures that have a special primary thread + * (e.g. x86) need to override this function. Otherwise the first + * thread in the SMT can be made the primary thread. + * + * The sibling cpumask of an offline CPU always contains the CPU + * itself on architectures using the implementation of + * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology. + * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for + * building their topology have to check whether to use this default + * implementation or to override it. + */ + return cpu == cpumask_first(topology_sibling_cpumask(cpu)); +} +#define topology_is_primary_thread topology_is_primary_thread + +#endif + static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); @@ -262,6 +285,36 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) #endif /* CONFIG_NUMA */ /** + * for_each_node_numadist() - iterate over nodes in increasing distance + * order, starting from a given node + * @node: the iteration variable and the starting node. + * @unvisited: a nodemask to keep track of the unvisited nodes. + * + * This macro iterates over NUMA node IDs in increasing distance from the + * starting @node and yields MAX_NUMNODES when all the nodes have been + * visited. + * + * Note that by the time the loop completes, the @unvisited nodemask will + * be fully cleared, unless the loop exits early. + * + * The difference between for_each_node() and for_each_node_numadist() is + * that the former allows to iterate over nodes in numerical order, whereas + * the latter iterates over nodes in increasing order of distance. + * + * This complexity of this iterator is O(N^2), where N represents the + * number of nodes, as each iteration involves scanning all nodes to + * find the one with the shortest distance. + * + * Requires rcu_lock to be held. + */ +#define for_each_node_numadist(node, unvisited) \ + for (int __start = (node), \ + (node) = nearest_node_nodemask((__start), &(unvisited)); \ + (node) < MAX_NUMNODES; \ + node_clear((node), (unvisited)), \ + (node) = nearest_node_nodemask((__start), &(unvisited))) + +/** * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance * from a given node. * @mask: the iteration variable. diff --git a/include/linux/torture.h b/include/linux/torture.h index c2e979f82f8d..1b59056c3b18 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -104,6 +104,7 @@ int torture_stutter_init(int s, int sgap); /* Initialization and cleanup. */ bool torture_init_begin(char *ttype, int v); void torture_init_end(void); +unsigned long get_torture_init_jiffies(void); bool torture_cleanup_begin(void); void torture_cleanup_end(void); bool torture_must_stop(void); @@ -130,7 +131,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #endif #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); #endif #endif /* __LINUX_TORTURE_H */ diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 20a40ade8030..a3d8305e88a5 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -224,7 +224,7 @@ enum tpm2_const { enum tpm2_timeouts { TPM2_TIMEOUT_A = 750, - TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, TPM2_DURATION_SHORT = 20, @@ -257,6 +257,7 @@ enum tpm2_return_codes { TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, TPM2_RC_RETRY = 0x0922, + TPM2_RC_SESSION_MEMORY = 0x0903, }; enum tpm2_command_codes { @@ -335,6 +336,7 @@ enum tpm2_cc_attrs { #define TPM_VID_WINBOND 0x1050 #define TPM_VID_STM 0x104A #define TPM_VID_ATML 0x1114 +#define TPM_VID_IFX 0x15D1 enum tpm_chip_flags { TPM_CHIP_FLAG_BOOTSTRAPPED = BIT(0), @@ -436,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc) return (rc & BIT(7)) ? rc & 0xbf : rc; } +/* + * Convert a return value from tpm_transmit_cmd() to POSIX error code. + */ +static inline ssize_t tpm_ret_to_err(ssize_t ret) +{ + if (ret < 0) + return ret; + + switch (tpm2_rc_value(ret)) { + case TPM2_RC_SUCCESS: + return 0; + case TPM2_RC_SESSION_MEMORY: + return -ENOMEM; + default: + return -EFAULT; + } +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); diff --git a/include/linux/trace.h b/include/linux/trace.h index fdcd76b7be83..7eaad857dee0 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -72,8 +72,8 @@ static inline int unregister_ftrace_export(struct trace_export *export) static inline void trace_printk_init_buffers(void) { } -static inline int trace_array_printk(struct trace_array *tr, unsigned long ip, - const char *fmt, ...) +static inline __printf(3, 4) +int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { return 0; } diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 58ad4ead33fc..fa9cf4292dff 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -673,6 +673,20 @@ struct trace_event_file { atomic_t tm_ref; /* trigger-mode reference counter */ }; +#ifdef CONFIG_HIST_TRIGGERS +extern struct irq_work hist_poll_work; +extern wait_queue_head_t hist_poll_wq; + +static inline void hist_poll_wakeup(void) +{ + if (wq_has_sleeper(&hist_poll_wq)) + irq_work_queue(&hist_poll_work); +} + +#define hist_poll_wait(file, wait) \ + poll_wait(file, &hist_poll_wq, wait) +#endif + #define __TRACE_EVENT_FLAGS(name, value) \ static int __init trace_init_flags_##name(void) \ { \ @@ -845,24 +859,6 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, const char *event, bool enable); -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement optimizing out. - */ -#define event_trace_printk(ip, fmt, args...) \ -do { \ - __trace_printk_check_format(fmt, ##args); \ - tracing_record_cmdline(current); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __section("__trace_printk_fmt") = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_bprintk(ip, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(ip, fmt, ##args); \ -} while (0) #ifdef CONFIG_PERF_EVENTS struct perf_event; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 1ef95c0287f0..a93ed5ac3226 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -88,8 +88,8 @@ extern __printf(2, 3) void trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern __printf(2, 0) void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args); -extern void -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern __printf(2, 0) +void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int trace_print_seq(struct seq_file *m, struct trace_seq *s); extern int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt); @@ -113,8 +113,8 @@ static inline __printf(2, 3) void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { } -static inline void -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +static inline __printf(2, 0) +void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { } diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 76d9055b2cff..a351763e6965 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -218,7 +218,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DEFINE_RUST_DO_TRACE(name, proto, args) \ notrace void rust_do_trace_##name(proto) \ { \ - __rust_do_trace_##name(args); \ + __do_trace_##name(args); \ } /* @@ -268,7 +268,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ - static inline void __rust_do_trace_##name(proto) \ + static inline void __do_trace_##name(proto) \ { \ if (cond) { \ guard(preempt_notrace)(); \ @@ -277,12 +277,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } \ static inline void trace_##name(proto) \ { \ - if (static_branch_unlikely(&__tracepoint_##name.key)) { \ - if (cond) { \ - guard(preempt_notrace)(); \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ - } \ - } \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ @@ -291,7 +287,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ - static inline void __rust_do_trace_##name(proto) \ + static inline void __do_trace_##name(proto) \ { \ guard(rcu_tasks_trace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ @@ -299,10 +295,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline void trace_##name(proto) \ { \ might_fault(); \ - if (static_branch_unlikely(&__tracepoint_##name.key)) { \ - guard(rcu_tasks_trace)(); \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ - } \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ diff --git a/include/linux/tty.h b/include/linux/tty.h index 2372f9357240..0a46e4054dec 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -239,7 +239,6 @@ struct tty_struct { struct list_head tty_files; -#define N_TTY_BUF_SIZE 4096 struct work_struct SAK_work; } __randomize_layout; @@ -251,7 +250,7 @@ struct tty_file_private { }; /** - * DOC: TTY Struct Flags + * enum tty_struct_flags - TTY Struct Flags * * These bits are used in the :c:member:`tty_struct.flags` field. * @@ -260,62 +259,64 @@ struct tty_file_private { * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. * - * TTY_THROTTLED + * @TTY_THROTTLED: * Driver input is throttled. The ldisc should call * :c:member:`tty_driver.unthrottle()` in order to resume reception when * it is ready to process more data (at threshold min). * - * TTY_IO_ERROR + * @TTY_IO_ERROR: * If set, causes all subsequent userspace read/write calls on the tty to * fail, returning -%EIO. (May be no ldisc too.) * - * TTY_OTHER_CLOSED + * @TTY_OTHER_CLOSED: * Device is a pty and the other side has closed. * - * TTY_EXCLUSIVE + * @TTY_EXCLUSIVE: * Exclusive open mode (a single opener). * - * TTY_DO_WRITE_WAKEUP + * @TTY_DO_WRITE_WAKEUP: * If set, causes the driver to call the * :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume * transmission when it can accept more data to transmit. * - * TTY_LDISC_OPEN + * @TTY_LDISC_OPEN: * Indicates that a line discipline is open. For debugging purposes only. * - * TTY_PTY_LOCK + * @TTY_PTY_LOCK: * A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic. * - * TTY_NO_WRITE_SPLIT + * @TTY_NO_WRITE_SPLIT: * Prevent driver from splitting up writes into smaller chunks (preserve * write boundaries to driver). * - * TTY_HUPPED + * @TTY_HUPPED: * The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`. * - * TTY_HUPPING + * @TTY_HUPPING: * The TTY is in the process of hanging up to abort potential readers. * - * TTY_LDISC_CHANGING + * @TTY_LDISC_CHANGING: * Line discipline for this TTY is being changed. I/O should not block * when this is set. Use tty_io_nonblock() to check. * - * TTY_LDISC_HALTED + * @TTY_LDISC_HALTED: * Line discipline for this TTY was stopped. No work should be queued to * this ldisc. */ -#define TTY_THROTTLED 0 -#define TTY_IO_ERROR 1 -#define TTY_OTHER_CLOSED 2 -#define TTY_EXCLUSIVE 3 -#define TTY_DO_WRITE_WAKEUP 5 -#define TTY_LDISC_OPEN 11 -#define TTY_PTY_LOCK 16 -#define TTY_NO_WRITE_SPLIT 17 -#define TTY_HUPPED 18 -#define TTY_HUPPING 19 -#define TTY_LDISC_CHANGING 20 -#define TTY_LDISC_HALTED 22 +enum tty_struct_flags { + TTY_THROTTLED, + TTY_IO_ERROR, + TTY_OTHER_CLOSED, + TTY_EXCLUSIVE, + TTY_DO_WRITE_WAKEUP, + TTY_LDISC_OPEN, + TTY_PTY_LOCK, + TTY_NO_WRITE_SPLIT, + TTY_HUPPED, + TTY_HUPPING, + TTY_LDISC_CHANGING, + TTY_LDISC_HALTED, +}; static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index dd4b31ce6d5d..188ee9b768eb 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -17,6 +17,92 @@ struct serial_icounter_struct; struct serial_struct; /** + * enum tty_driver_flag -- TTY Driver Flags + * + * These are flags passed to tty_alloc_driver(). + * + * @TTY_DRIVER_INSTALLED: + * Whether this driver was succesfully installed. This is a tty internal + * flag. Do not touch. + * + * @TTY_DRIVER_RESET_TERMIOS: + * Requests the tty layer to reset the termios setting when the last + * process has closed the device. Used for PTYs, in particular. + * + * @TTY_DRIVER_REAL_RAW: + * Indicates that the driver will guarantee not to set any special + * character handling flags if this is set for the tty: + * + * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` + * + * That is, if there is no reason for the driver to + * send notifications of parity and break characters up to the line + * driver, it won't do so. This allows the line driver to optimize for + * this case if this flag is set. (Note that there is also a promise, if + * the above case is true, not to signal overruns, either.) + * + * @TTY_DRIVER_DYNAMIC_DEV: + * The individual tty devices need to be registered with a call to + * tty_register_device() when the device is found in the system and + * unregistered with a call to tty_unregister_device() so the devices will + * be show up properly in sysfs. If not set, all &tty_driver.num entries + * will be created by the tty core in sysfs when tty_register_driver() is + * called. This is to be used by drivers that have tty devices that can + * appear and disappear while the main tty driver is registered with the + * tty core. + * + * @TTY_DRIVER_DEVPTS_MEM: + * Don't use the standard arrays (&tty_driver.ttys and + * &tty_driver.termios), instead use dynamic memory keyed through the + * devpts filesystem. This is only applicable to the PTY driver. + * + * @TTY_DRIVER_HARDWARE_BREAK: + * Hardware handles break signals. Pass the requested timeout to the + * &tty_operations.break_ctl instead of using a simple on/off interface. + * + * @TTY_DRIVER_DYNAMIC_ALLOC: + * Do not allocate structures which are needed per line for this driver + * (&tty_driver.ports) as it would waste memory. The driver will take + * care. This is only applicable to the PTY driver. + * + * @TTY_DRIVER_UNNUMBERED_NODE: + * Do not create numbered ``/dev`` nodes. For example, create + * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a + * driver for a single tty device is being allocated. + */ +enum tty_driver_flag { + TTY_DRIVER_INSTALLED = BIT(0), + TTY_DRIVER_RESET_TERMIOS = BIT(1), + TTY_DRIVER_REAL_RAW = BIT(2), + TTY_DRIVER_DYNAMIC_DEV = BIT(3), + TTY_DRIVER_DEVPTS_MEM = BIT(4), + TTY_DRIVER_HARDWARE_BREAK = BIT(5), + TTY_DRIVER_DYNAMIC_ALLOC = BIT(6), + TTY_DRIVER_UNNUMBERED_NODE = BIT(7), +}; + +enum tty_driver_type { + TTY_DRIVER_TYPE_SYSTEM, + TTY_DRIVER_TYPE_CONSOLE, + TTY_DRIVER_TYPE_SERIAL, + TTY_DRIVER_TYPE_PTY, + TTY_DRIVER_TYPE_SCC, + TTY_DRIVER_TYPE_SYSCONS, +}; + +enum tty_driver_subtype { + SYSTEM_TYPE_TTY = 1, + SYSTEM_TYPE_CONSOLE, + SYSTEM_TYPE_SYSCONS, + SYSTEM_TYPE_SYSPTMX, + + PTY_TYPE_MASTER = 1, + PTY_TYPE_SLAVE, + + SERIAL_TYPE_NORMAL = 1, +}; + +/** * struct tty_operations -- interface between driver and tty * * @lookup: ``struct tty_struct *()(struct tty_driver *self, struct file *, @@ -320,7 +406,7 @@ struct serial_struct; * * @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)`` * - * kgdboc support (Documentation/dev-tools/kgdb.rst). This routine is + * kgdboc support (Documentation/process/debugging/kgdb.rst). This routine is * called to initialize the HW for later use by calling @poll_get_char or * @poll_put_char. * @@ -414,8 +500,8 @@ struct tty_operations { * @major: major /dev device number (zero for autoassignment) * @minor_start: the first minor /dev device number * @num: number of devices allocated - * @type: type of tty driver (%TTY_DRIVER_TYPE_) - * @subtype: subtype of tty driver (%SYSTEM_TYPE_, %PTY_TYPE_, %SERIAL_TYPE_) + * @type: type of tty driver (enum tty_driver_type) + * @subtype: subtype of tty driver (enum tty_driver_subtype) * @init_termios: termios to set to each tty initially (e.g. %tty_std_termios) * @flags: tty driver flags (%TTY_DRIVER_) * @proc_entry: proc fs entry, used internally @@ -447,8 +533,8 @@ struct tty_driver { int major; int minor_start; unsigned int num; - short type; - short subtype; + enum tty_driver_type type; + enum tty_driver_subtype subtype; struct ktermios init_termios; unsigned long flags; struct proc_dir_entry *proc_entry; @@ -478,7 +564,13 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line); void tty_driver_kref_put(struct tty_driver *driver); -/* Use TTY_DRIVER_* flags below */ +/** + * tty_alloc_driver - allocate tty driver + * @lines: count of lines this driver can handle at most + * @flags: some of enum tty_driver_flag, will be set in driver->flags + * + * Returns: struct tty_driver or a PTR-encoded error (use IS_ERR() and friends). + */ #define tty_alloc_driver(lines, flags) \ __tty_alloc_driver(lines, THIS_MODULE, flags) @@ -494,84 +586,6 @@ static inline void tty_set_operations(struct tty_driver *driver, driver->ops = op; } -/** - * DOC: TTY Driver Flags - * - * TTY_DRIVER_RESET_TERMIOS - * Requests the tty layer to reset the termios setting when the last - * process has closed the device. Used for PTYs, in particular. - * - * TTY_DRIVER_REAL_RAW - * Indicates that the driver will guarantee not to set any special - * character handling flags if this is set for the tty: - * - * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` - * - * That is, if there is no reason for the driver to - * send notifications of parity and break characters up to the line - * driver, it won't do so. This allows the line driver to optimize for - * this case if this flag is set. (Note that there is also a promise, if - * the above case is true, not to signal overruns, either.) - * - * TTY_DRIVER_DYNAMIC_DEV - * The individual tty devices need to be registered with a call to - * tty_register_device() when the device is found in the system and - * unregistered with a call to tty_unregister_device() so the devices will - * be show up properly in sysfs. If not set, all &tty_driver.num entries - * will be created by the tty core in sysfs when tty_register_driver() is - * called. This is to be used by drivers that have tty devices that can - * appear and disappear while the main tty driver is registered with the - * tty core. - * - * TTY_DRIVER_DEVPTS_MEM - * Don't use the standard arrays (&tty_driver.ttys and - * &tty_driver.termios), instead use dynamic memory keyed through the - * devpts filesystem. This is only applicable to the PTY driver. - * - * TTY_DRIVER_HARDWARE_BREAK - * Hardware handles break signals. Pass the requested timeout to the - * &tty_operations.break_ctl instead of using a simple on/off interface. - * - * TTY_DRIVER_DYNAMIC_ALLOC - * Do not allocate structures which are needed per line for this driver - * (&tty_driver.ports) as it would waste memory. The driver will take - * care. This is only applicable to the PTY driver. - * - * TTY_DRIVER_UNNUMBERED_NODE - * Do not create numbered ``/dev`` nodes. For example, create - * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a - * driver for a single tty device is being allocated. - */ -#define TTY_DRIVER_INSTALLED 0x0001 -#define TTY_DRIVER_RESET_TERMIOS 0x0002 -#define TTY_DRIVER_REAL_RAW 0x0004 -#define TTY_DRIVER_DYNAMIC_DEV 0x0008 -#define TTY_DRIVER_DEVPTS_MEM 0x0010 -#define TTY_DRIVER_HARDWARE_BREAK 0x0020 -#define TTY_DRIVER_DYNAMIC_ALLOC 0x0040 -#define TTY_DRIVER_UNNUMBERED_NODE 0x0080 - -/* tty driver types */ -#define TTY_DRIVER_TYPE_SYSTEM 0x0001 -#define TTY_DRIVER_TYPE_CONSOLE 0x0002 -#define TTY_DRIVER_TYPE_SERIAL 0x0003 -#define TTY_DRIVER_TYPE_PTY 0x0004 -#define TTY_DRIVER_TYPE_SCC 0x0005 /* scc driver */ -#define TTY_DRIVER_TYPE_SYSCONS 0x0006 - -/* system subtypes (magic, used by tty_io.c) */ -#define SYSTEM_TYPE_TTY 0x0001 -#define SYSTEM_TYPE_CONSOLE 0x0002 -#define SYSTEM_TYPE_SYSCONS 0x0003 -#define SYSTEM_TYPE_SYSPTMX 0x0004 - -/* pty subtypes (magic, used by tty_io.c) */ -#define PTY_TYPE_MASTER 0x0001 -#define PTY_TYPE_SLAVE 0x0002 - -/* serial subtype definitions */ -#define SERIAL_TYPE_NORMAL 1 - int tty_register_driver(struct tty_driver *driver); void tty_unregister_driver(struct tty_driver *driver); struct device *tty_register_device(struct tty_driver *driver, unsigned index, diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index af01e89074b2..c5cccc3fc1e8 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -39,7 +39,6 @@ do { \ int ldsem_down_read(struct ld_semaphore *sem, long timeout); int ldsem_down_read_trylock(struct ld_semaphore *sem); int ldsem_down_write(struct ld_semaphore *sem, long timeout); -int ldsem_down_write_trylock(struct ld_semaphore *sem); void ldsem_up_read(struct ld_semaphore *sem); void ldsem_up_write(struct ld_semaphore *sem); diff --git a/include/linux/turris-omnia-mcu-interface.h b/include/linux/turris-omnia-mcu-interface.h index 2da8cbeb158a..38b45ab00053 100644 --- a/include/linux/turris-omnia-mcu-interface.h +++ b/include/linux/turris-omnia-mcu-interface.h @@ -9,7 +9,10 @@ #define __TURRIS_OMNIA_MCU_INTERFACE_H #include <linux/bitfield.h> -#include <linux/bits.h> +#include <linux/bitops.h> +#include <linux/types.h> +#include <linux/unaligned.h> +#include <asm/byteorder.h> enum omnia_commands_e { OMNIA_CMD_GET_STATUS_WORD = 0x01, /* slave sends status word back */ @@ -236,6 +239,20 @@ enum omnia_int_e { OMNIA_INT_LAN5_LED1 = BIT(31), }; +enum omnia_cmd_led_mode_e { + OMNIA_CMD_LED_MODE_LED_MASK = GENMASK(3, 0), + OMNIA_CMD_LED_MODE_USER = BIT(4), +}; + +#define OMNIA_CMD_LED_MODE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_MODE_LED_MASK, _l) + +enum omnia_cmd_led_state_e { + OMNIA_CMD_LED_STATE_LED_MASK = GENMASK(3, 0), + OMNIA_CMD_LED_STATE_ON = BIT(4), +}; + +#define OMNIA_CMD_LED_STATE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_STATE_LED_MASK, _l) + enum omnia_cmd_poweroff_e { OMNIA_CMD_POWER_OFF_POWERON_BUTTON = BIT(0), OMNIA_CMD_POWER_OFF_MAGIC = 0xdead, @@ -246,4 +263,135 @@ enum omnia_cmd_usb_ovc_prot_e { OMNIA_CMD_xET_USB_OVC_PROT_ENABLE = BIT(4), }; +/* Command execution functions */ + +struct i2c_client; + +int omnia_cmd_write_read(const struct i2c_client *client, + void *cmd, unsigned int cmd_len, + void *reply, unsigned int reply_len); + +static inline int omnia_cmd_write(const struct i2c_client *client, void *cmd, + unsigned int len) +{ + return omnia_cmd_write_read(client, cmd, len, NULL, 0); +} + +static inline int omnia_cmd_write_u8(const struct i2c_client *client, u8 cmd, + u8 val) +{ + u8 buf[2] = { cmd, val }; + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_write_u16(const struct i2c_client *client, u8 cmd, + u16 val) +{ + u8 buf[3]; + + buf[0] = cmd; + put_unaligned_le16(val, &buf[1]); + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_write_u32(const struct i2c_client *client, u8 cmd, + u32 val) +{ + u8 buf[5]; + + buf[0] = cmd; + put_unaligned_le32(val, &buf[1]); + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_read(const struct i2c_client *client, u8 cmd, + void *reply, unsigned int len) +{ + return omnia_cmd_write_read(client, &cmd, 1, reply, len); +} + +static inline unsigned int +omnia_compute_reply_length(unsigned long mask, bool interleaved, + unsigned int offset) +{ + if (!mask) + return 0; + + return ((__fls(mask) >> 3) << interleaved) + 1 + offset; +} + +/* Returns 0 on success */ +static inline int omnia_cmd_read_bits(const struct i2c_client *client, u8 cmd, + unsigned long bits, unsigned long *dst) +{ + __le32 reply; + int err; + + if (!bits) { + *dst = 0; + return 0; + } + + err = omnia_cmd_read(client, cmd, &reply, + omnia_compute_reply_length(bits, false, 0)); + if (err) + return err; + + *dst = le32_to_cpu(reply) & bits; + + return 0; +} + +static inline int omnia_cmd_read_bit(const struct i2c_client *client, u8 cmd, + unsigned long bit) +{ + unsigned long reply; + int err; + + err = omnia_cmd_read_bits(client, cmd, bit, &reply); + if (err) + return err; + + return !!reply; +} + +static inline int omnia_cmd_read_u32(const struct i2c_client *client, u8 cmd, + u32 *dst) +{ + __le32 reply; + int err; + + err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); + if (err) + return err; + + *dst = le32_to_cpu(reply); + + return 0; +} + +static inline int omnia_cmd_read_u16(const struct i2c_client *client, u8 cmd, + u16 *dst) +{ + __le16 reply; + int err; + + err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); + if (err) + return err; + + *dst = le16_to_cpu(reply); + + return 0; +} + +static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, + u8 *reply) +{ + return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); +} + #endif /* __TURRIS_OMNIA_MCU_INTERFACE_H */ diff --git a/include/linux/types.h b/include/linux/types.h index 2d7b9ae8714c..49b79c8bb1a9 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -43,7 +43,7 @@ typedef unsigned long uintptr_t; typedef long intptr_t; #ifdef CONFIG_HAVE_UID16 -/* This is defined by include/asm-{arch}/posix_types.h */ +/* This is defined by arch/{arch}/include/asm/posix_types.h */ typedef __kernel_old_uid_t old_uid_t; typedef __kernel_old_gid_t old_gid_t; #endif /* CONFIG_UID16 */ @@ -92,6 +92,7 @@ typedef unsigned char unchar; typedef unsigned short ushort; typedef unsigned int uint; typedef unsigned long ulong; +typedef unsigned long long ullong; #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ @@ -248,5 +249,17 @@ typedef void (*swap_func_t)(void *a, void *b, int size); typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv); typedef int (*cmp_func_t)(const void *a, const void *b); +/* + * rcuwait provides a way of blocking and waking up a single + * task in an rcu-safe manner. + * + * The only time @task is non-nil is when a user is blocked (or + * checking if it needs to) on a condition, and reset as soon as we + * know that the condition has succeeded and are awoken. + */ +struct rcuwait { + struct task_struct __rcu *task; +}; + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index e9c702c1908d..7c06f4795670 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -7,7 +7,7 @@ #include <linux/minmax.h> #include <linux/nospec.h> #include <linux/sched.h> -#include <linux/thread_info.h> +#include <linux/ucopysize.h> #include <asm/uaccess.h> diff --git a/include/linux/ucopysize.h b/include/linux/ucopysize.h new file mode 100644 index 000000000000..41c2d9720466 --- /dev/null +++ b/include/linux/ucopysize.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Perform sanity checking for object sizes for uaccess.h and uio.h. */ +#ifndef __LINUX_UCOPYSIZE_H__ +#define __LINUX_UCOPYSIZE_H__ + +#include <linux/bug.h> + +#ifdef CONFIG_HARDENED_USERCOPY +#include <linux/jump_label.h> +extern void __check_object_size(const void *ptr, unsigned long n, + bool to_user); + +DECLARE_STATIC_KEY_MAYBE(CONFIG_HARDENED_USERCOPY_DEFAULT_ON, + validate_usercopy_range); + +static __always_inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ + if (!__builtin_constant_p(n) && + static_branch_maybe(CONFIG_HARDENED_USERCOPY_DEFAULT_ON, + &validate_usercopy_range)) { + __check_object_size(ptr, n, to_user); + } +} +#else +static inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ } +#endif /* CONFIG_HARDENED_USERCOPY */ + +extern void __compiletime_error("copy source size is too small") +__bad_copy_from(void); +extern void __compiletime_error("copy destination size is too small") +__bad_copy_to(void); + +void __copy_overflow(int size, unsigned long count); + +static inline void copy_overflow(int size, unsigned long count) +{ + if (IS_ENABLED(CONFIG_BUG)) + __copy_overflow(size, count); +} + +static __always_inline __must_check bool +check_copy_size(const void *addr, size_t bytes, bool is_source) +{ + int sz = __builtin_object_size(addr, 0); + if (unlikely(sz >= 0 && sz < bytes)) { + if (!__builtin_constant_p(bytes)) + copy_overflow(sz, bytes); + else if (is_source) + __bad_copy_from(); + else + __bad_copy_to(); + return false; + } + if (WARN_ON_ONCE(bytes > INT_MAX)) + return false; + check_object_size(addr, bytes, is_source); + return true; +} + +#endif /* __LINUX_UCOPYSIZE_H__ */ diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index f85ec5613721..2dc767e08f54 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -132,6 +132,7 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) u32 map_id_down(struct uid_gid_map *map, u32 id); u32 map_id_up(struct uid_gid_map *map, u32 id); +u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count); #else @@ -186,6 +187,11 @@ static inline u32 map_id_down(struct uid_gid_map *map, u32 id) return id; } +static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) +{ + return id; +} + static inline u32 map_id_up(struct uid_gid_map *map, u32 id) { return id; diff --git a/include/linux/uio.h b/include/linux/uio.h index 853f9de5aa05..49ece9e1888f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -6,8 +6,8 @@ #define __LINUX_UIO_H #include <linux/kernel.h> -#include <linux/thread_info.h> #include <linux/mm_types.h> +#include <linux/ucopysize.h> #include <uapi/linux/uio.h> struct page; @@ -82,6 +82,15 @@ struct iov_iter { }; }; +typedef __u16 uio_meta_flags_t; + +struct uio_meta { + uio_meta_flags_t flags; + u16 app_tag; + u64 seed; + struct iov_iter iter; +}; + static inline const struct iovec *iter_iov(const struct iov_iter *iter) { if (iter->iter_type == ITER_UBUF) diff --git a/include/linux/unroll.h b/include/linux/unroll.h index d42fd6366373..863fb69f6a7e 100644 --- a/include/linux/unroll.h +++ b/include/linux/unroll.h @@ -9,6 +9,50 @@ #include <linux/args.h> +#ifdef CONFIG_CC_IS_CLANG +#define __pick_unrolled(x, y) _Pragma(#x) +#elif CONFIG_GCC_VERSION >= 80000 +#define __pick_unrolled(x, y) _Pragma(#y) +#else +#define __pick_unrolled(x, y) /* not supported */ +#endif + +/** + * unrolled - loop attributes to ask the compiler to unroll it + * + * Usage: + * + * #define BATCH 8 + * + * unrolled_count(BATCH) + * for (u32 i = 0; i < BATCH; i++) + * // loop body without cross-iteration dependencies + * + * This is only a hint and the compiler is free to disable unrolling if it + * thinks the count is suboptimal and may hurt performance and/or hugely + * increase object code size. + * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends + * on what iter x will do with variables) is not a strict requirement, but + * provides best performance and object code size. + * Available only on Clang and GCC 8.x onwards. + */ + +/* Ask the compiler to pick an optimal unroll count, Clang only */ +#define unrolled \ + __pick_unrolled(clang loop unroll(enable), /* nothing */) + +/* Unroll each @n iterations of the loop */ +#define unrolled_count(n) \ + __pick_unrolled(clang loop unroll_count(n), GCC unroll n) + +/* Unroll the whole loop */ +#define unrolled_full \ + __pick_unrolled(clang loop unroll(full), GCC unroll 65534) + +/* Never unroll the loop */ +#define unrolled_none \ + __pick_unrolled(clang loop unroll(disable), GCC unroll 1) + #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) #define __UNROLL_0(MACRO, args...) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e0a4c2082245..2e46b69ff0a6 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/timer.h> +#include <linux/seqlock.h> struct uprobe; struct vm_area_struct; @@ -38,6 +39,8 @@ struct page; #define MAX_URETPROBE_DEPTH 64 +#define UPROBE_NO_TRAMPOLINE_VADDR (~0UL) + struct uprobe_consumer { /* * handler() can return UPROBE_HANDLER_REMOVE to signal the need to @@ -124,6 +127,10 @@ struct uprobe_task { unsigned int depth; struct return_instance *return_instances; + struct return_instance *ri_pool; + struct timer_list ri_timer; + seqcount_t ri_seqcount; + union { struct { struct arch_uprobe_task autask; @@ -137,8 +144,8 @@ struct uprobe_task { }; struct uprobe *active_uprobe; - struct timer_list ri_timer; unsigned long xol_vaddr; + bool signal_denied; struct arch_uprobe *auprobe; }; @@ -154,12 +161,18 @@ struct return_instance { unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ - int consumers_cnt; + int cons_cnt; /* total number of session consumers */ struct return_instance *next; /* keep as stack */ struct rcu_head rcu; - struct return_consumer consumers[] __counted_by(consumers_cnt); + /* singular pre-allocated return_consumer instance for common case */ + struct return_consumer consumer; + /* + * extra return_consumer instances for rare cases of multiple session consumers, + * contains (cons_cnt - 1) elements + */ + struct return_consumer *extra_consumers; } ____cacheline_aligned; enum rp_check { diff --git a/include/linux/usb.h b/include/linux/usb.h index cfa8005e24f9..c63a7a9191ca 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -51,6 +51,7 @@ struct ep_device; * @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder * @ss_ep_comp: SuperSpeed companion descriptor for this endpoint * @ssp_isoc_ep_comp: SuperSpeedPlus isoc companion descriptor for this endpoint + * @eusb2_isoc_ep_comp: eUSB2 isoc companion descriptor for this endpoint * @urb_list: urbs queued to this endpoint; maintained by usbcore * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH) * with one or more transfer descriptors (TDs) per urb @@ -64,9 +65,10 @@ struct ep_device; * descriptor within an active interface in a given USB configuration. */ struct usb_host_endpoint { - struct usb_endpoint_descriptor desc; - struct usb_ss_ep_comp_descriptor ss_ep_comp; - struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp; + struct usb_endpoint_descriptor desc; + struct usb_ss_ep_comp_descriptor ss_ep_comp; + struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp; + struct usb_eusb2_isoc_ep_comp_descriptor eusb2_isoc_ep_comp; struct list_head urb_list; void *hcpriv; struct ep_device *ep_dev; /* For sysfs info */ @@ -612,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -722,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; diff --git a/include/linux/usb/mctp-usb.h b/include/linux/usb/mctp-usb.h new file mode 100644 index 000000000000..a2f6f1e04efb --- /dev/null +++ b/include/linux/usb/mctp-usb.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mctp-usb.h - MCTP USB transport binding: common definitions, + * based on DMTF0283 specification: + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0283_1.0.1.pdf + * + * These are protocol-level definitions, that may be shared between host + * and gadget drivers. + * + * Copyright (C) 2024-2025 Code Construct Pty Ltd + */ + +#ifndef __LINUX_USB_MCTP_USB_H +#define __LINUX_USB_MCTP_USB_H + +#include <linux/types.h> + +struct mctp_usb_hdr { + __be16 id; + u8 rsvd; + u8 len; +} __packed; + +#define MCTP_USB_XFER_SIZE 512 +#define MCTP_USB_BTU 68 +#define MCTP_USB_MTU_MIN MCTP_USB_BTU +#define MCTP_USB_MTU_MAX (U8_MAX - sizeof(struct mctp_usb_hdr)) +#define MCTP_USB_DMTF_ID 0x1ab4 + +#endif /* __LINUX_USB_MCTP_USB_H */ diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 3963e55e88a3..fbdef950f06c 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -61,7 +61,7 @@ struct musb_hdrc_eps_bits { }; struct musb_hdrc_config { - struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */ + const struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */ unsigned fifo_cfg_size; /* size of the fifo configuration */ /* MUSB configuration-specific details */ diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index d50098fb16b5..3068c3084eb6 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -33,7 +33,9 @@ enum pd_ctrl_msg_type { PD_CTRL_FR_SWAP = 19, PD_CTRL_GET_PPS_STATUS = 20, PD_CTRL_GET_COUNTRY_CODES = 21, - /* 22-31 Reserved */ + /* 22-23 Reserved */ + PD_CTRL_GET_REVISION = 24, + /* 25-31 Reserved */ }; enum pd_data_msg_type { @@ -46,7 +48,9 @@ enum pd_data_msg_type { PD_DATA_ALERT = 6, PD_DATA_GET_COUNTRY_INFO = 7, PD_DATA_ENTER_USB = 8, - /* 9-14 Reserved */ + /* 9-11 Reserved */ + PD_DATA_REVISION = 12, + /* 13-14 Reserved */ PD_DATA_VENDOR_DEF = 15, /* 16-31 Reserved */ }; @@ -453,6 +457,20 @@ static inline unsigned int rdo_max_power(u32 rdo) #define EUDO_TBT_SUPPORT BIT(14) #define EUDO_HOST_PRESENT BIT(13) +/* + * Request Message Data Object (PD Revision 3.1+ only) + * -------- + * <31:28> :: Revision Major + * <27:24> :: Revision Minor + * <23:20> :: Version Major + * <19:16> :: Version Minor + * <15:0> :: Reserved, Shall be set to zero + */ + +#define RMDO(rev_maj, rev_min, ver_maj, ver_min) \ + (((rev_maj) & 0xf) << 28 | ((rev_min) & 0xf) << 24 | \ + ((ver_maj) & 0xf) << 20 | ((ver_min) & 0xf) << 16) + /* USB PD timers and counters */ #define PD_T_NO_RESPONSE 5000 /* 4.5 - 5.5 seconds */ #define PD_T_DB_DETECT 10000 /* 10 - 15 seconds */ diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index e4de6bc1f69b..0fa9885a1038 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -223,7 +223,6 @@ extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev, extern struct usb_phy *devm_usb_get_phy_by_node(struct device *dev, struct device_node *node, struct notifier_block *nb); extern void usb_put_phy(struct usb_phy *); -extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x); extern void usb_phy_set_event(struct usb_phy *x, unsigned long event); extern void usb_phy_set_charger_current(struct usb_phy *usb_phy, unsigned int mA); @@ -259,10 +258,6 @@ static inline void usb_put_phy(struct usb_phy *x) { } -static inline void devm_usb_put_phy(struct device *dev, struct usb_phy *x) -{ -} - static inline void usb_phy_set_event(struct usb_phy *x, unsigned long event) { } diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 33a4c146dc19..2ca60828f28b 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h index 8539956bc2be..51be3bb8fccb 100644 --- a/include/linux/usb/storage.h +++ b/include/linux/usb/storage.h @@ -82,4 +82,12 @@ struct bulk_cs_wrap { #define US_BULK_RESET_REQUEST 0xff #define US_BULK_GET_MAX_LUN 0xfe +/* + * If 4 LUNs are supported then the LUNs would be + * numbered from 0 to 3, and the return value for + * US_BULK_GET_MAX_LUN request would be 3. The valid + * LUN field is 4 bits wide, the upper limit is 0x0f. + */ +#define US_BULK_MAX_LUN_LIMIT 0x0f + #endif diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index d616b8807000..252af3f77039 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -140,6 +140,7 @@ int typec_cable_set_identity(struct typec_cable *cable); * @mode: Index of the Mode * @vdo: VDO returned by Discover Modes USB PD command * @roles: Only for ports. DRP if the mode is available in both roles + * @inactive: Only for ports. Make this port inactive (default is active). * * Description of an Alternate Mode which a connector, cable plug or partner * supports. @@ -150,6 +151,7 @@ struct typec_altmode_desc { u32 vdo; /* Only used with ports */ enum typec_port_data roles; + bool inactive; }; void typec_partner_set_pd_revision(struct typec_partner *partner, u16 pd_revision); diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index fa97d7e00f5c..55dcea12082c 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -44,6 +44,7 @@ struct typec_thunderbolt_data { #define TBT_GEN3_NON_ROUNDED 0 #define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1 +#define TBT_CABLE_ROUNDED BIT(19) #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 5050f502c1ed..4b651065738a 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -49,19 +49,10 @@ /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_USB_ULPI) -struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, - unsigned int flags); - struct usb_phy *devm_otg_ulpi_create(struct device *dev, struct usb_phy_io_ops *ops, unsigned int flags); #else -static inline struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, - unsigned int flags) -{ - return NULL; -} - static inline struct usb_phy *devm_otg_ulpi_create(struct device *dev, struct usb_phy_io_ops *ops, unsigned int flags) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 7183e5aca282..a0bb6d012137 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,8 +5,10 @@ #include <linux/kref.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> +#include <linux/rculist_nulls.h> #include <linux/sched.h> #include <linux/workqueue.h> +#include <linux/rcuref.h> #include <linux/rwsem.h> #include <linux/sysctl.h> #include <linux/err.h> @@ -115,10 +117,11 @@ struct user_namespace { } __randomize_layout; struct ucounts { - struct hlist_node node; + struct hlist_nulls_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + struct rcu_head rcu; + rcuref_t count; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; @@ -131,9 +134,15 @@ void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); -struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); +static inline struct ucounts * __must_check get_ucounts(struct ucounts *ucounts) +{ + if (rcuref_get(&ucounts->count)) + return ucounts; + return NULL; +} + static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { return atomic_long_read(&ucounts->rlimit[type]); diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 825487fb66fa..3b570b765b75 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -5,6 +5,21 @@ #include <linux/math.h> /** + * for_each_if - helper for handling conditionals in various for_each macros + * @condition: The condition to check + * + * Typical use:: + * + * #define for_each_foo_bar(x, y) \' + * list_for_each_entry(x, y->list, head) \' + * for_each_if(x->something == SOMETHING) + * + * The for_each_if() macro makes the use of for_each_foo_bar() less error + * prone. + */ +#define for_each_if(condition) if (!(condition)) {} else + +/** * find_closest - locate the closest element in a sorted array * @x: The reference value. * @a: The array in which to look for the closest element. Must be sorted diff --git a/include/linux/vdso_datastore.h b/include/linux/vdso_datastore.h new file mode 100644 index 000000000000..a91fa24b06e0 --- /dev/null +++ b/include/linux/vdso_datastore.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VDSO_DATASTORE_H +#define _LINUX_VDSO_DATASTORE_H + +#include <linux/mm_types.h> + +extern const struct vm_special_mapping vdso_vvar_mapping; +struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr); + +#endif /* _LINUX_VDSO_DATASTORE_H */ diff --git a/include/linux/verification.h b/include/linux/verification.h index cb2d47f28091..4f3022d081c3 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -38,8 +38,6 @@ enum key_being_used_for { VERIFYING_UNSPECIFIED_SIGNATURE, NR__KEY_BEING_USED_FOR }; -extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR]; - #ifdef CONFIG_SYSTEM_DATA_VERIFICATION struct key; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 000a6cab2d31..707b00772ce1 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -67,6 +67,7 @@ struct vfio_device { struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; + struct ida pasids; u8 iommufd_attached:1; #endif u8 cdev_opened:1; @@ -91,6 +92,8 @@ struct vfio_device { * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not * called. * @detach_ioas: Opposite of attach_ioas + * @pasid_attach_ioas: The pasid variation of attach_ioas + * @pasid_detach_ioas: Opposite of pasid_attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -115,6 +118,9 @@ struct vfio_device_ops { void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); void (*detach_ioas)(struct vfio_device *vdev); + int (*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid, + u32 *pt_id); + void (*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -139,6 +145,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id); +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -166,6 +176,10 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #define vfio_iommufd_physical_detach_ioas \ ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_pasid_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_pasid_detach_ioas \ + ((void (*)(struct vfio_device *vdev, u32 pasid)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) diff --git a/include/linux/vfsdebug.h b/include/linux/vfsdebug.h new file mode 100644 index 000000000000..9cf22d3eb9dd --- /dev/null +++ b/include/linux/vfsdebug.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VFS_DEBUG_H +#define LINUX_VFS_DEBUG_H 1 + +#include <linux/bug.h> + +struct inode; + +#ifdef CONFIG_DEBUG_VFS +void dump_inode(struct inode *inode, const char *reason); + +#define VFS_BUG_ON(cond) BUG_ON(cond) +#define VFS_WARN_ON(cond) (void)WARN_ON(cond) +#define VFS_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#define VFS_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) +#define VFS_WARN(cond, format...) (void)WARN(cond, format) + +#define VFS_BUG_ON_INODE(cond, inode) ({ \ + if (unlikely(!!(cond))) { \ + dump_inode(inode, "VFS_BUG_ON_INODE(" #cond")");\ + BUG_ON(1); \ + } \ +}) + +#define VFS_WARN_ON_INODE(cond, inode) ({ \ + int __ret_warn = !!(cond); \ + \ + if (unlikely(__ret_warn)) { \ + dump_inode(inode, "VFS_WARN_ON_INODE(" #cond")");\ + WARN_ON(1); \ + } \ + unlikely(__ret_warn); \ +}) +#else +#define VFS_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) + +#define VFS_BUG_ON_INODE(cond, inode) VFS_BUG_ON(cond) +#define VFS_WARN_ON_INODE(cond, inode) BUILD_BUG_ON_INVALID(cond) +#endif /* CONFIG_DEBUG_VFS */ + +#endif diff --git a/include/linux/virtio.h b/include/linux/virtio.h index dd88682e27e3..64cb4b04be7a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -190,6 +190,8 @@ int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); #endif void virtio_reset_device(struct virtio_device *dev); +int virtio_device_reset_prepare(struct virtio_device *dev); +int virtio_device_reset_done(struct virtio_device *dev); size_t virtio_max_dma_size(const struct virtio_device *vdev); @@ -214,6 +216,12 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * changes; may be called in interrupt context. * @freeze: optional function to call during suspend/hibernation. * @restore: optional function to call on resume. + * @reset_prepare: optional function to call when a transport specific reset + * occurs. + * @reset_done: optional function to call after transport specific reset + * operation has finished. + * @shutdown: synchronize with the device on shutdown. If provided, replaces + * the virtio core implementation. */ struct virtio_driver { struct device_driver driver; @@ -229,6 +237,9 @@ struct virtio_driver { void (*config_changed)(struct virtio_device *dev); int (*freeze)(struct virtio_device *dev); int (*restore)(struct virtio_device *dev); + int (*reset_prepare)(struct virtio_device *dev); + int (*reset_done)(struct virtio_device *dev); + void (*shutdown)(struct virtio_device *dev); }; #define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0387d64e2c66..36fb3edfa403 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -140,6 +140,7 @@ struct virtio_vsock_sock { u32 last_fwd_cnt; u32 rx_bytes; u32 buf_alloc; + u32 buf_used; struct sk_buff_head rx_queue; u32 msg_count; }; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f70d0958095c..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,9 +41,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, PGSCAN_DIRECT_THROTTLE, PGSCAN_ANON, PGSCAN_FILE, @@ -151,6 +153,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, + DIRECT_MAP_LEVEL2_COLLAPSE, + DIRECT_MAP_LEVEL3_COLLAPSE, #endif #ifdef CONFIG_PER_VMA_LOCK_STATS VMA_LOCK_SUCCESS, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..5ca8d4dd149d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -61,6 +61,7 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; + unsigned long requested_size; }; struct vmap_area { diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h index e1dec1a6a749..37e003ae5262 100644 --- a/include/linux/vmcore_info.h +++ b/include/linux/vmcore_info.h @@ -6,9 +6,8 @@ #include <linux/elfcore.h> #include <linux/elf.h> -#define CRASH_CORE_NOTE_NAME "CORE" #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(NN_PRSTATUS), 4) #define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) /* diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9f3a04345b86..b2ccb6845595 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -10,15 +10,8 @@ #include <linux/static_key.h> #include <linux/mmdebug.h> -extern int sysctl_stat_interval; - #ifdef CONFIG_NUMA -#define ENABLE_NUMA_STAT 1 -#define DISABLE_NUMA_STAT 0 -extern int sysctl_vm_numa_stat; DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); -int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); #endif struct reclaim_stat { @@ -304,10 +297,6 @@ void quiet_vmstat(void); void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); -struct ctl_table; -int vmstat_refresh(const struct ctl_table *, int write, void *buffer, size_t *lenp, - loff_t *ppos); - void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *); int calculate_pressure_threshold(struct zone *zone); @@ -515,7 +504,7 @@ static inline const char *node_stat_name(enum node_stat_item item) static inline const char *lru_list_name(enum lru_list lru) { - return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_" + return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 6fb663b36f72..60c9eacd2cf3 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -431,11 +431,11 @@ enum { ((((_p)[0] & 0xFF) << 24) | (((_p)[1] & 0xFF) << 16) | ((_p)[2])) /* - * The VMCI IOCTLs. We use identity code 7, as noted in ioctl-number.h, and - * we start at sequence 9f. This gives us the same values that our shipping - * products use, starting at 1951, provided we leave out the direction and - * structure size. Note that VMMon occupies the block following us, starting - * at 2001. + * The VMCI IOCTLs. We use identity code 7, as noted in ioctl-number.rst, + * and we start at sequence 9f. This gives us the same values that our + * shipping products use, starting at 1951, provided we leave out the + * direction and structure size. Note that VMMon occupies the block + * following us, starting at 2001. */ #define IOCTL_VMCI_VERSION _IO(7, 0x9f) /* 1951 */ #define IOCTL_VMCI_INIT_CONTEXT _IO(7, 0xa0) @@ -453,9 +453,7 @@ enum { #define IOCTL_VMCI_CTX_GET_CPT_STATE _IO(7, 0xb1) #define IOCTL_VMCI_CTX_SET_CPT_STATE _IO(7, 0xb2) #define IOCTL_VMCI_GET_CONTEXT_ID _IO(7, 0xb3) -#define IOCTL_VMCI_SOCKETS_VERSION _IO(7, 0xb4) -#define IOCTL_VMCI_SOCKETS_GET_AF_VALUE _IO(7, 0xb8) -#define IOCTL_VMCI_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) +/*IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9)*/ #define IOCTL_VMCI_SET_NOTIFY _IO(7, 0xcb) /* 1995 */ /*IOCTL_VMMON_START _IO(7, 0xd1)*/ /* 2001 */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 6d90ad974408..965a19809c7e 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -316,6 +316,9 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); } \ \ cmd; \ + \ + if (condition) \ + break; \ } \ finish_wait(&wq_head, &__wq_entry); \ __out: __ret; \ @@ -1207,14 +1210,16 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) -#define init_wait(wait) \ +#define init_wait_func(wait, function) \ do { \ (wait)->private = current; \ - (wait)->func = autoremove_wake_function; \ + (wait)->func = function; \ INIT_LIST_HEAD(&(wait)->entry); \ (wait)->flags = 0; \ } while (0) +#define init_wait(wait) init_wait_func(wait, autoremove_wake_function) + typedef int (*task_call_f)(struct task_struct *p, void *arg); extern int task_call_func(struct task_struct *p, task_call_f func, void *arg); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d11b903c2edb..eda4b62511f7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -313,6 +313,30 @@ static inline void cgroup_writeback_umount(struct super_block *sb) /* * mm/page-writeback.c */ +/* consolidated parameters for balance_dirty_pages() and its subroutines */ +struct dirty_throttle_control { +#ifdef CONFIG_CGROUP_WRITEBACK + struct wb_domain *dom; + struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */ +#endif + struct bdi_writeback *wb; + struct fprop_local_percpu *wb_completions; + + unsigned long avail; /* dirtyable */ + unsigned long dirty; /* file_dirty + write + nfs */ + unsigned long thresh; /* dirty threshold */ + unsigned long bg_thresh; /* dirty background threshold */ + unsigned long limit; /* hard dirty limit */ + + unsigned long wb_dirty; /* per-wb counterparts */ + unsigned long wb_thresh; + unsigned long wb_bg_thresh; + + unsigned long pos_ratio; + bool freerun; + bool dirty_exceeded; +}; + void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_timer_fn(struct timer_list *t); @@ -327,12 +351,8 @@ extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; -extern unsigned int dirtytime_expire_interval; extern int laptop_mode; -int dirtytime_interval_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); - void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); unsigned long cgwb_calc_thresh(struct bdi_writeback *wb); diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 79c781875c09..a4d6cc0c9f68 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -97,7 +97,7 @@ struct wwan_port_caps { * * This function must be balanced with a call to wwan_remove_port(). * - * Returns a valid pointer to wwan_port on success or PTR_ERR on failure + * Returns: a valid pointer to wwan_port on success or PTR_ERR on failure */ struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 0b618ec04115..78eede109b1a 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1555,6 +1555,8 @@ int xa_get_order(struct xarray *, unsigned long index); int xas_get_order(struct xa_state *xas); void xas_split(struct xa_state *, void *entry, unsigned int order); void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); +void xas_try_split(struct xa_state *xas, void *entry, unsigned int order); +unsigned int xas_try_split_min_order(unsigned int order); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { @@ -1576,6 +1578,17 @@ static inline void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { } + +static inline void xas_try_split(struct xa_state *xas, void *entry, + unsigned int order) +{ +} + +static inline unsigned int xas_try_split_min_order(unsigned int order) +{ + return 0; +} + #endif /** diff --git a/include/linux/zpool.h b/include/linux/zpool.h index a67d62b79698..52f30e526607 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -4,9 +4,8 @@ * * Copyright (C) 2014 Dan Streetman * - * This is a common frontend for the zbud and zsmalloc memory - * storage pool implementations. Typically, this is used to - * store compressed memory. + * This is a common frontend for the zswap compressed memory storage + * implementations. */ #ifndef _ZPOOL_H_ @@ -14,25 +13,6 @@ struct zpool; -/* - * Control how a handle is mapped. It will be ignored if the - * implementation does not support it. Its use is optional. - * Note that this does not refer to memory protection, it - * refers to how the memory will be copied in/out if copying - * is necessary during mapping; read-write is the safest as - * it copies the existing memory in on map, and copies the - * changed memory back out on unmap. Write-only does not copy - * in the memory and should only be used for initialization. - * If in doubt, use ZPOOL_MM_DEFAULT which is read-write. - */ -enum zpool_mapmode { - ZPOOL_MM_RW, /* normal read-write mapping */ - ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */ - ZPOOL_MM_WO, /* write-only (no copy-in at map time) */ - - ZPOOL_MM_DEFAULT = ZPOOL_MM_RW -}; - bool zpool_has_pool(char *type); struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp); @@ -41,17 +21,19 @@ const char *zpool_get_type(struct zpool *pool); void zpool_destroy_pool(struct zpool *pool); -bool zpool_malloc_support_movable(struct zpool *pool); - int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, unsigned long *handle); void zpool_free(struct zpool *pool, unsigned long handle); -void *zpool_map_handle(struct zpool *pool, unsigned long handle, - enum zpool_mapmode mm); +void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, + void *local_copy); + +void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, + void *handle_mem); -void zpool_unmap_handle(struct zpool *pool, unsigned long handle); +void zpool_obj_write(struct zpool *zpool, unsigned long handle, + void *handle_mem, size_t mem_len); u64 zpool_get_total_pages(struct zpool *pool); @@ -81,15 +63,16 @@ struct zpool_driver { void *(*create)(const char *name, gfp_t gfp); void (*destroy)(void *pool); - bool malloc_support_movable; int (*malloc)(void *pool, size_t size, gfp_t gfp, unsigned long *handle); void (*free)(void *pool, unsigned long handle); - bool sleep_mapped; - void *(*map)(void *pool, unsigned long handle, - enum zpool_mapmode mm); - void (*unmap)(void *pool, unsigned long handle); + void *(*obj_read_begin)(void *pool, unsigned long handle, + void *local_copy); + void (*obj_read_end)(void *pool, unsigned long handle, + void *handle_mem); + void (*obj_write)(void *pool, unsigned long handle, + void *handle_mem, size_t mem_len); u64 (*total_pages)(void *pool); }; diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index a48cd0ffe57d..c26baf9fb331 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -16,23 +16,6 @@ #include <linux/types.h> -/* - * zsmalloc mapping modes - * - * NOTE: These only make a difference when a mapped object spans pages. - */ -enum zs_mapmode { - ZS_MM_RW, /* normal read-write mapping */ - ZS_MM_RO, /* read-only (no copy-out at unmap time) */ - ZS_MM_WO /* write-only (no copy-in at map time) */ - /* - * NOTE: ZS_MM_WO should only be used for initializing new - * (uninitialized) allocations. Partial writes to already - * initialized allocations should use ZS_MM_RW to preserve the - * existing data. - */ -}; - struct zs_pool_stats { /* How many pages were migrated (freed) */ atomic_long_t pages_compacted; @@ -48,14 +31,18 @@ void zs_free(struct zs_pool *pool, unsigned long obj); size_t zs_huge_class_size(struct zs_pool *pool); -void *zs_map_object(struct zs_pool *pool, unsigned long handle, - enum zs_mapmode mm); -void zs_unmap_object(struct zs_pool *pool, unsigned long handle); - unsigned long zs_get_total_pages(struct zs_pool *pool); unsigned long zs_compact(struct zs_pool *pool); unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size); void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats); + +void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle, + void *local_copy); +void zs_obj_read_end(struct zs_pool *pool, unsigned long handle, + void *handle_mem); +void zs_obj_write(struct zs_pool *pool, unsigned long handle, + void *handle_mem, size_t mem_len); + #endif diff --git a/include/linux/zstd.h b/include/linux/zstd.h index b2c7cf310c8f..2f2a3c8b8a33 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -160,7 +160,6 @@ typedef ZSTD_parameters zstd_parameters; zstd_parameters zstd_get_params(int level, unsigned long long estimated_src_size); - /** * zstd_get_cparams() - returns zstd_compression_parameters for selected level * @level: The compression level @@ -173,9 +172,20 @@ zstd_parameters zstd_get_params(int level, zstd_compression_parameters zstd_get_cparams(int level, unsigned long long estimated_src_size, size_t dict_size); -/* ====== Single-pass Compression ====== */ - typedef ZSTD_CCtx zstd_cctx; +typedef ZSTD_cParameter zstd_cparameter; + +/** + * zstd_cctx_set_param() - sets a compression parameter + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @param: The parameter to set. + * @value: The value to set the parameter to. + * + * Return: Zero or an error, which can be checked using zstd_is_error(). + */ +size_t zstd_cctx_set_param(zstd_cctx *cctx, zstd_cparameter param, int value); + +/* ====== Single-pass Compression ====== */ /** * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx @@ -191,6 +201,20 @@ typedef ZSTD_CCtx zstd_cctx; size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); /** + * zstd_cctx_workspace_bound_with_ext_seq_prod() - max memory needed to + * initialize a zstd_cctx when using the block-level external sequence + * producer API. + * @parameters: The compression parameters to be used. + * + * If multiple compression parameters might be used, the caller must call + * this function for each set of parameters and use the maximum size. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cctx(). + */ +size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *parameters); + +/** * zstd_init_cctx() - initialize a zstd compression context * @workspace: The workspace to emplace the context into. It must outlive * the returned context. @@ -425,6 +449,16 @@ typedef ZSTD_CStream zstd_cstream; size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); /** + * zstd_cstream_workspace_bound_with_ext_seq_prod() - memory needed to initialize + * a zstd_cstream when using the block-level external sequence producer API. + * @cparams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cstream(). + */ +size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *cparams); + +/** * zstd_init_cstream() - initialize a zstd streaming compression context * @parameters The zstd parameters to use for compression. * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller @@ -584,6 +618,18 @@ size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); /** + * zstd_register_sequence_producer() - exposes the zstd library function + * ZSTD_registerSequenceProducer(). This is used for the block-level external + * sequence producer API. See upstream zstd.h for detailed documentation. + */ +typedef ZSTD_sequenceProducer_F zstd_sequence_producer_f; +void zstd_register_sequence_producer( + zstd_cctx *cctx, + void* sequence_producer_state, + zstd_sequence_producer_f sequence_producer +); + +/** * struct zstd_frame_params - zstd frame parameters stored in the frame header * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not * present. @@ -596,7 +642,7 @@ size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); * * See zstd_lib.h. */ -typedef ZSTD_frameHeader zstd_frame_header; +typedef ZSTD_FrameHeader zstd_frame_header; /** * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame @@ -611,4 +657,35 @@ typedef ZSTD_frameHeader zstd_frame_header; size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, size_t src_size); +/** + * struct zstd_sequence - a sequence of literals or a match + * + * @offset: The offset of the match + * @litLength: The literal length of the sequence + * @matchLength: The match length of the sequence + * @rep: Represents which repeat offset is used + */ +typedef ZSTD_Sequence zstd_sequence; + +/** + * zstd_compress_sequences_and_literals() - compress an array of zstd_sequence and literals + * + * @cctx: The zstd compression context. + * @dst: The buffer to compress the data into. + * @dst_capacity: The size of the destination buffer. + * @in_seqs: The array of zstd_sequence to compress. + * @in_seqs_size: The number of sequences in in_seqs. + * @literals: The literals associated to the sequences to be compressed. + * @lit_size: The size of the literals in the literals buffer. + * @lit_capacity: The size of the literals buffer. + * @decompressed_size: The size of the input data + * + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity, + const zstd_sequence *in_seqs, size_t in_seqs_size, + const void* literals, size_t lit_size, size_t lit_capacity, + size_t decompressed_size); + #endif /* LINUX_ZSTD_H */ diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h index 58b6dd45a969..c307fb011132 100644 --- a/include/linux/zstd_errors.h +++ b/include/linux/zstd_errors.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -12,13 +13,18 @@ #define ZSTD_ERRORS_H_398273423 -/*===== dependency =====*/ -#include <linux/types.h> /* size_t */ +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#define ZSTDERRORLIB_VISIBLE +#ifndef ZSTDERRORLIB_HIDDEN +# if (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden"))) +# else +# define ZSTDERRORLIB_HIDDEN +# endif +#endif -/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ -#define ZSTDERRORLIB_VISIBILITY -#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE /*-********************************************* * Error codes list @@ -43,14 +49,18 @@ typedef enum { ZSTD_error_frameParameter_windowTooLarge = 16, ZSTD_error_corruption_detected = 20, ZSTD_error_checksum_wrong = 22, + ZSTD_error_literals_headerWrong = 24, ZSTD_error_dictionary_corrupted = 30, ZSTD_error_dictionary_wrong = 32, ZSTD_error_dictionaryCreation_failed = 34, ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_combination_unsupported = 41, ZSTD_error_parameter_outOfBound = 42, ZSTD_error_tableLog_tooLarge = 44, ZSTD_error_maxSymbolValue_tooLarge = 46, ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_cannotProduce_uncompressedBlock = 49, + ZSTD_error_stabilityCondition_notRespected = 50, ZSTD_error_stage_wrong = 60, ZSTD_error_init_missing = 62, ZSTD_error_memory_allocation = 64, @@ -58,18 +68,18 @@ typedef enum { ZSTD_error_dstSize_tooSmall = 70, ZSTD_error_srcSize_wrong = 72, ZSTD_error_dstBuffer_null = 74, + ZSTD_error_noForwardProgress_destFull = 80, + ZSTD_error_noForwardProgress_inputEmpty = 82, /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_seekableIO = 102, ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_sequenceProducer_failed = 106, + ZSTD_error_externalSequences_invalid = 107, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; -/*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare with enum list published above */ -ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h index 79d55465d5c1..e295d4125dde 100644 --- a/include/linux/zstd_lib.h +++ b/include/linux/zstd_lib.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -11,23 +12,47 @@ #ifndef ZSTD_H_235446 #define ZSTD_H_235446 -/* ====== Dependency ======*/ -#include <linux/limits.h> /* INT_MAX */ + +/* ====== Dependencies ======*/ #include <linux/types.h> /* size_t */ +#include <linux/zstd_errors.h> /* list of errors */ +#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#include <linux/limits.h> /* INT_MAX */ +#endif /* ZSTD_STATIC_LINKING_ONLY */ + /* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDLIB_VISIBLE +#define ZSTDLIB_VISIBLE + +#ifndef ZSTDLIB_HIDDEN # if (__GNUC__ >= 4) && !defined(__MINGW32__) -# define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) # define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) # else -# define ZSTDLIB_VISIBLE # define ZSTDLIB_HIDDEN # endif #endif + #define ZSTDLIB_API ZSTDLIB_VISIBLE +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) || defined(__IAR_SYSTEMS_ICC__) +# define ZSTD_DEPRECATED(message) __attribute__((deprecated(message))) +# elif (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ + /* ***************************************************************************** Introduction @@ -65,7 +90,7 @@ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 5 -#define ZSTD_VERSION_RELEASE 2 +#define ZSTD_VERSION_RELEASE 7 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) /*! ZSTD_versionNumber() : @@ -103,11 +128,12 @@ ZSTDLIB_API const char* ZSTD_versionString(void); /* ************************************* -* Simple API +* Simple Core API ***************************************/ /*! ZSTD_compress() : * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have + * enough space to successfully compress the data. * @return : compressed size written into `dst` (<= `dstCapacity), * or an error code if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, @@ -115,47 +141,55 @@ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, int compressionLevel); /*! ZSTD_decompress() : - * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - * `dstCapacity` is an upper bound of originalSize to regenerate. - * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * Multiple compressed frames can be decompressed at once with this method. + * The result will be the concatenation of all decompressed frames, back to back. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * First frame's decompressed size can be extracted using ZSTD_getFrameContentSize(). + * If maximum upper bound isn't known, prefer using streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); + +/*====== Decompression helper functions ======*/ + /*! ZSTD_getFrameContentSize() : requires v1.3.0+ - * `src` should point to the start of a ZSTD encoded frame. - * `srcSize` must be at least as large as the frame header. - * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - * @return : - decompressed size of `src` frame content, if known - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) - * note 1 : a 0 return value means the frame is valid but "empty". - * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * Optionally, application can rely on some implicit limit, - * as ZSTD_decompress() only needs an upper bound of decompressed size. - * (For example, data could be necessarily cut into blocks <= 16 KB). - * note 3 : decompressed size is always present when compression is completed using single-pass functions, - * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). - * note 4 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure return value fits within application's authorized limits. - * Each application can set its own limits. - * note 6 : This function replaces ZSTD_getDecompressedSize() */ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * When invoking this method on a skippable frame, it will return 0. + * note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode). + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -/*! ZSTD_getDecompressedSize() : - * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). +/*! ZSTD_getDecompressedSize() (obsolete): + * This function is now obsolete, in favor of ZSTD_getFrameContentSize(). * Both functions work the same way, but ZSTD_getDecompressedSize() blends * "empty", "unknown" and "error" results to the same return value (0), * while ZSTD_getFrameContentSize() gives them separate return values. * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize") ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ @@ -163,18 +197,50 @@ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t * `srcSize` must be >= first frame size * @return : the compressed size of the first frame starting at `src`, * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - * or an error code if input is invalid */ + * or an error code if input is invalid + * Note 1: this method is called _find*() because it's not enough to read the header, + * it may have to scan through the frame's content, to reach its end. + * Note 2: this method also works with Skippable Frames. In which case, + * it returns the size of the complete skippable frame, + * which is always equal to its content size + 8 bytes for headers. */ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); -/*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ -ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ -ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ +/*====== Compression helper functions ======*/ + +/*! ZSTD_compressBound() : + * maximum compressed size in worst case single-pass scenario. + * When invoking `ZSTD_compress()`, or any other one-pass compression function, + * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize) + * as it eliminates one potential failure scenario, + * aka not enough room in dst buffer to write the compressed frame. + * Note : ZSTD_compressBound() itself can fail, if @srcSize >= ZSTD_MAX_INPUT_SIZE . + * In which case, ZSTD_compressBound() will return an error code + * which can be tested using ZSTD_isError(). + * + * ZSTD_COMPRESSBOUND() : + * same as ZSTD_compressBound(), but as a macro. + * It can be used to produce constants, which can be useful for static allocation, + * for example to size a static array on stack. + * Will produce constant value 0 if srcSize is too large. + */ +#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U) +#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ + + +/*====== Error helper functions ======*/ +/* ZSTD_isError() : + * Most ZSTD_* functions returning a size_t value can be tested for error, + * using ZSTD_isError(). + * @return 1 if error, 0 otherwise + */ +ZSTDLIB_API unsigned ZSTD_isError(size_t result); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); /* convert a result into an error code, which can be compared to error enum list */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t result); /*!< provides readable string from a function result */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ /* ************************************* @@ -182,25 +248,25 @@ ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compres ***************************************/ /*= Compression context * When compressing many times, - * it is recommended to allocate a context just once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. + * it is recommended to allocate a compression context just once, + * and reuse it for each successive compression operation. + * This will make the workload easier for system's memory. * Note : re-using context is just a speed / resource optimization. * It doesn't change the compression ratio, which remains identical. - * Note 2 : In multi-threaded environments, - * use one different context per thread for parallel execution. + * Note 2: For parallel execution in multi-threaded environments, + * use one different context per thread . */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* compatible with NULL pointer */ /*! ZSTD_compressCCtx() : * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . + * Important : in order to mirror `ZSTD_compress()` behavior, + * this function compresses at the requested compression level, + * __ignoring any other advanced parameter__ . * If any advanced parameter was set using the advanced API, - * they will all be reset. Only `compressionLevel` remains. + * they will all be reset. Only @compressionLevel remains. */ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, @@ -210,7 +276,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, /*= Decompression context * When decompressing many times, * it is recommended to allocate a context only once, - * and re-use it for each successive compression operation. + * and reuse it for each successive compression operation. * This will make workload friendlier for system's memory. * Use one context per thread for parallel execution. */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; @@ -220,7 +286,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer * /*! ZSTD_decompressDCtx() : * Same as ZSTD_decompress(), * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. + * Compatible with sticky parameters (see below). */ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, @@ -236,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, * using ZSTD_CCtx_set*() functions. * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ . * * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * * This API supersedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. + * In the future, we expect to remove API entry points from experimental which are redundant with this API. */ @@ -324,6 +390,19 @@ typedef enum { * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. * Special: value 0 means "use default strategy". */ + + ZSTD_c_targetCBlockSize=130, /* v1.5.6+ + * Attempts to fit compressed block size into approximately targetCBlockSize. + * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. + * Note that it's not a guarantee, just a convergence target (default:0). + * No target when targetCBlockSize == 0. + * This is helpful in low bandwidth streaming environments to improve end-to-end latency, + * when a client can make use of partial documents (a prominent example being Chrome). + * Note: this parameter is stable since v1.5.6. + * It was present as an experimental parameter in earlier versions, + * but it's not recommended using it with earlier library versions + * due to massive performance regressions. + */ /* LDM mode parameters */ ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio @@ -403,15 +482,18 @@ typedef enum { * ZSTD_c_forceMaxWindow * ZSTD_c_forceAttachDict * ZSTD_c_literalCompressionMode - * ZSTD_c_targetCBlockSize * ZSTD_c_srcSizeHint * ZSTD_c_enableDedicatedDictSearch * ZSTD_c_stableInBuffer * ZSTD_c_stableOutBuffer * ZSTD_c_blockDelimiters * ZSTD_c_validateSequences - * ZSTD_c_useBlockSplitter + * ZSTD_c_blockSplitterLevel + * ZSTD_c_splitAfterSequences * ZSTD_c_useRowMatchFinder + * ZSTD_c_prefetchCDictTables + * ZSTD_c_enableSeqProducerFallback + * ZSTD_c_maxBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -421,7 +503,7 @@ typedef enum { ZSTD_c_experimentalParam3=1000, ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, - ZSTD_c_experimentalParam6=1003, + /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */ ZSTD_c_experimentalParam7=1004, ZSTD_c_experimentalParam8=1005, ZSTD_c_experimentalParam9=1006, @@ -430,7 +512,12 @@ typedef enum { ZSTD_c_experimentalParam12=1009, ZSTD_c_experimentalParam13=1010, ZSTD_c_experimentalParam14=1011, - ZSTD_c_experimentalParam15=1012 + ZSTD_c_experimentalParam15=1012, + ZSTD_c_experimentalParam16=1013, + ZSTD_c_experimentalParam17=1014, + ZSTD_c_experimentalParam18=1015, + ZSTD_c_experimentalParam19=1016, + ZSTD_c_experimentalParam20=1017 } ZSTD_cParameter; typedef struct { @@ -493,7 +580,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. + * This also removes any reference to any dictionary or external sequence producer. * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -502,11 +589,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); /*! ZSTD_compress2() : * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * (note that this entry point doesn't even expose a compression level parameter). * ZSTD_compress2() always starts a new frame. * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() * - The function is always blocking, returns when compression is completed. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have + * enough space to successfully compress the data, though it is possible it fails for other reasons. * @return : compressed size written into `dst` (<= `dstCapacity), * or an error code if it fails (which can be tested using ZSTD_isError()). */ @@ -543,13 +632,17 @@ typedef enum { * ZSTD_d_stableOutBuffer * ZSTD_d_forceIgnoreChecksum * ZSTD_d_refMultipleDDicts + * ZSTD_d_disableHuffmanAssembly + * ZSTD_d_maxBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ ZSTD_d_experimentalParam1=1000, ZSTD_d_experimentalParam2=1001, ZSTD_d_experimentalParam3=1002, - ZSTD_d_experimentalParam4=1003 + ZSTD_d_experimentalParam4=1003, + ZSTD_d_experimentalParam5=1004, + ZSTD_d_experimentalParam6=1005 } ZSTD_dParameter; @@ -604,14 +697,14 @@ typedef struct ZSTD_outBuffer_s { * A ZSTD_CStream object is required to track streaming operation. * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. * * For parallel execution, use one separate ZSTD_CStream per thread. * * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. * * Parameters are sticky : when starting a new compression on the same context, -* it will re-use the same sticky parameters as previous compression session. +* it will reuse the same sticky parameters as previous compression session. * When in doubt, it's recommended to fully initialize the context before usage. * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to @@ -700,6 +793,11 @@ typedef enum { * only ZSTD_e_end or ZSTD_e_flush operations are allowed. * Before starting a new compression job, or changing compression parameters, * it is required to fully flush internal buffers. + * - note: if an operation ends with an error, it may leave @cctx in an undefined state. + * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. + * In order to be re-employed after an error, a state must be reset, + * which can be done explicitly (ZSTD_CCtx_reset()), + * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) */ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_outBuffer* output, @@ -728,8 +826,6 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output * This following is a legacy streaming API, available since v1.0+ . * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). * It is redundant, but remains fully supported. - * Streaming in combination with advanced parameters and dictionary compression - * can only be used through the new API. ******************************************************************************/ /*! @@ -738,6 +834,9 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * + * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API + * to compress with a dictionary. */ ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); /*! @@ -758,7 +857,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); * * A ZSTD_DStream object is required to track streaming operations. * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. +* ZSTD_DStream objects can be re-employed multiple times. * * Use ZSTD_initDStream() to start a new decompression operation. * @return : recommended first input size @@ -768,16 +867,21 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); * The function will update both `pos` fields. * If `input.pos < input.size`, some input has not been consumed. * It's up to the caller to present again remaining data. +* * The function tries to flush all data decoded immediately, respecting output buffer size. * If `output.pos < output.size`, decoder has flushed everything it could. -* But if `output.pos == output.size`, there might be some data left within internal buffers., +* +* However, when `output.pos == output.size`, it's more difficult to know. +* If @return > 0, the frame is not complete, meaning +* either there is still some data left to flush within internal buffers, +* or there is more input to read to complete the frame (or both). * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. * @return : 0 when a frame is completely decoded and fully flushed, * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : * the return value is a suggested next input size (just a hint for better latency) -* that will never request more than the remaining frame size. +* that will never request more than the remaining content of the compressed frame. * *******************************************************************************/ typedef ZSTD_DCtx ZSTD_DStream; /*< DCtx and DStream are now effectively same object (>= v1.3.0) */ @@ -788,13 +892,38 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer /*===== Streaming decompression functions =====*/ -/* This function is redundant with the advanced API and equivalent to: +/*! ZSTD_initDStream() : + * Initialize/reset DStream state for new decompression operation. + * Call before new decompression operation using same DStream. * + * Note : This function is redundant with the advanced API and equivalent to: * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_refDDict(zds, NULL); */ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); +/*! ZSTD_decompressStream() : + * Streaming decompression function. + * Call repetitively to consume full input updating it as necessary. + * Function will update both input and output `pos` fields exposing current state via these fields: + * - `input.pos < input.size`, some input remaining and caller should provide remaining input + * on the next call. + * - `output.pos < output.size`, decoder flushed internal output buffer. + * - `output.pos == output.size`, unflushed data potentially present in the internal buffers, + * check ZSTD_decompressStream() @return value, + * if > 0, invoke it again to flush remaining data to output. + * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. + * + * @return : 0 when a frame is completely decoded and fully flushed, + * or an error code, which can be tested using ZSTD_isError(), + * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. + * + * Note: when an operation returns with an error code, the @zds state may be left in undefined state. + * It's UB to invoke `ZSTD_decompressStream()` on such a state. + * In order to re-use such a state, it must be first reset, + * which can be done explicitly (`ZSTD_DCtx_reset()`), + * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) + */ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ @@ -913,7 +1042,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); * If @return == 0, the dictID could not be decoded. * This could for one of the following reasons : * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information. * Note : this use case also happens when using a non-conformant dictionary. * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). * - This is not a Zstandard frame. @@ -925,9 +1054,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * Advanced dictionary and prefix API (Requires v1.4.0+) * * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and - * only reset with the context is reset with ZSTD_reset_parameters or - * ZSTD_reset_session_and_parameters. Prefixes are single-use. + * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). + * Dictionaries are sticky, they remain valid when same context is reused, + * they only reset when the context is reset + * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters. + * In contrast, Prefixes are single-use. ******************************************************************************/ @@ -937,8 +1068,9 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames, + * until parameters are reset, a new dictionary is loaded, or the dictionary + * is explicitly invalidated by loading a NULL dictionary. * Note 2 : Loading a dictionary involves building tables. * It's also a CPU consuming operation, with non-negligible impact on latency. * Tables are dependent on compression parameters, and for this reason, @@ -947,11 +1079,15 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. * In such a case, dictionary buffer must outlive its users. * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - * to precisely select how dictionary content must be interpreted. */ + * to precisely select how dictionary content must be interpreted. + * Note 5 : This method does not benefit from LDM (long distance mode). + * If you want to employ LDM on some large dictionary content, + * prefer employing ZSTD_CCtx_refPrefix() described below. + */ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ - * Reference a prepared dictionary, to be used for all next compressed frames. + * Reference a prepared dictionary, to be used for all future compressed frames. * Note that compression parameters are enforced from within CDict, * and supersede any compression parameter previously set within CCtx. * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. @@ -970,6 +1106,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); * Decompression will need same prefix to properly regenerate data. * Compressing with a prefix is similar in outcome as performing a diff and compressing it, * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * This method is compatible with LDM (long distance mode). * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary * Note 1 : Prefix buffer is referenced. It **must** outlive compression. @@ -986,9 +1123,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ - * Create an internal DDict from dict buffer, - * to be used to decompress next frames. - * The dictionary remains valid for all future frames, until explicitly invalidated. + * Create an internal DDict from dict buffer, to be used to decompress all future frames. + * The dictionary remains valid for all future frames, until explicitly invalidated, or + * a new dictionary is loaded. * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, * meaning "return to no-dictionary mode". @@ -1012,9 +1149,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s * The memory for the table is allocated on the first call to refDDict, and can be * freed with ZSTD_freeDCtx(). * + * If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary + * will be managed, and referencing a dictionary effectively "discards" any previous one. + * * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. * Special: referencing a NULL DDict means "return to no-dictionary mode". * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. */ @@ -1051,6 +1189,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + #endif /* ZSTD_H_235446 */ @@ -1066,29 +1205,12 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + /* This can be overridden externally to hide static symbols. */ #ifndef ZSTDLIB_STATIC_API #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE #endif -/* Deprecation warnings : - * Should these warnings be a problem, it is generally possible to disable them, - * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. - * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. - */ -#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API /* disable deprecation warnings */ -#else -# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message))) -# elif (__GNUC__ >= 3) -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated)) -# else -# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API -# endif -#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ - /* ************************************************************************************** * experimental API (static linking only) **************************************************************************************** @@ -1123,6 +1245,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ #define ZSTD_STRATEGY_MIN ZSTD_fast #define ZSTD_STRATEGY_MAX ZSTD_btultra2 +#define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */ #define ZSTD_OVERLAPLOG_MIN 0 @@ -1146,7 +1269,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) /* Advanced parameter bounds */ -#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX #define ZSTD_SRCSIZEHINT_MIN 0 #define ZSTD_SRCSIZEHINT_MAX INT_MAX @@ -1188,7 +1311,7 @@ typedef struct { * * Note: This field is optional. ZSTD_generateSequences() will calculate the value of * 'rep', but repeat offsets do not necessarily need to be calculated from an external - * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * sequence provider perspective. For example, ZSTD_compressSequences() does not * use this 'rep' field at all (as of now). */ } ZSTD_Sequence; @@ -1293,17 +1416,18 @@ typedef enum { } ZSTD_literalCompressionMode_e; typedef enum { - /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final - * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable - * or ZSTD_ps_disable allow for a force enable/disable the feature. + /* Note: This enum controls features which are conditionally beneficial. + * Zstd can take a decision on whether or not to enable the feature (ZSTD_ps_auto), + * but setting the switch to ZSTD_ps_enable or ZSTD_ps_disable force enable/disable the feature. */ ZSTD_ps_auto = 0, /* Let the library automatically determine whether the feature shall be enabled */ ZSTD_ps_enable = 1, /* Force-enable the feature */ ZSTD_ps_disable = 2 /* Do not use the feature */ -} ZSTD_paramSwitch_e; +} ZSTD_ParamSwitch_e; +#define ZSTD_paramSwitch_e ZSTD_ParamSwitch_e /* old name */ /* ************************************* -* Frame size functions +* Frame header and size functions ***************************************/ /*! ZSTD_findDecompressedSize() : @@ -1345,34 +1469,130 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); /*! ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * srcSize must be large enough, aka >= ZSTD_FRAMEHEADERSIZE_PREFIX. * @return : size of the Frame Header, * or an error code (if srcSize is too small) */ ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_FrameType_e; +#define ZSTD_frameType_e ZSTD_FrameType_e /* old name */ +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_FrameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; /* for ZSTD_skippableFrame, contains the skippable magic variant [0-15] */ + unsigned checksumFlag; + unsigned _reserved1; + unsigned _reserved2; +} ZSTD_FrameHeader; +#define ZSTD_frameHeader ZSTD_FrameHeader /* old name */ + +/*! ZSTD_getFrameHeader() : + * decode Frame Header into `zfhPtr`, or requires larger `srcSize`. + * @return : 0 => header is complete, `zfhPtr` is correctly filled, + * >0 => `srcSize` is too small, @return value is the wanted `srcSize` amount, `zfhPtr` is not filled, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize); +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); + +/*! ZSTD_decompressionMargin() : + * Zstd supports in-place decompression, where the input and output buffers overlap. + * In this case, the output buffer must be at least (Margin + Output_Size) bytes large, + * and the input buffer must be at the end of the output buffer. + * + * _______________________ Output Buffer ________________________ + * | | + * | ____ Input Buffer ____| + * | | | + * v v v + * |---------------------------------------|-----------|----------| + * ^ ^ ^ + * |___________________ Output_Size ___________________|_ Margin _| + * + * NOTE: See also ZSTD_DECOMPRESSION_MARGIN(). + * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or + * ZSTD_decompressDCtx(). + * NOTE: This function supports multi-frame input. + * + * @param src The compressed frame(s) + * @param srcSize The size of the compressed frame(s) + * @returns The decompression margin or an error that can be checked with ZSTD_isError(). + */ +ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize); + +/*! ZSTD_DECOMPRESS_MARGIN() : + * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from + * the compressed frame, compute it from the original size and the blockSizeLog. + * See ZSTD_decompressionMargin() for details. + * + * WARNING: This macro does not support multi-frame input, the input must be a single + * zstd frame. If you need that support use the function, or implement it yourself. + * + * @param originalSize The original uncompressed size of the data. + * @param blockSize The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX). + * Unless you explicitly set the windowLog smaller than + * ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX. + */ +#define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)( \ + ZSTD_FRAMEHEADERSIZE_MAX /* Frame header */ + \ + 4 /* checksum */ + \ + ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \ + (blockSize) /* One block of margin */ \ + )) + typedef enum { - ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ - ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ -} ZSTD_sequenceFormat_e; + ZSTD_sf_noBlockDelimiters = 0, /* ZSTD_Sequence[] has no block delimiters, just sequences */ + ZSTD_sf_explicitBlockDelimiters = 1 /* ZSTD_Sequence[] contains explicit block delimiters */ +} ZSTD_SequenceFormat_e; +#define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */ + +/*! ZSTD_sequenceBound() : + * `srcSize` : size of the input buffer + * @return : upper-bound for the number of sequences that can be generated + * from a buffer of srcSize bytes + * + * note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). + */ +ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); /*! ZSTD_generateSequences() : - * Generate sequences using ZSTD_compress2, given a source buffer. + * WARNING: This function is meant for debugging and informational purposes ONLY! + * Its implementation is flawed, and it will be deleted in a future version. + * It is not guaranteed to succeed, as there are several cases where it will give + * up and fail. You should NOT use this function in production code. + * + * This function is deprecated, and will be removed in a future version. + * + * Generate sequences using ZSTD_compress2(), given a source buffer. + * + * @param zc The compression context to be used for ZSTD_compress2(). Set any + * compression parameters you need on this context. + * @param outSeqs The output sequences buffer of size @p outSeqsSize + * @param outSeqsCapacity The size of the output sequences buffer. + * ZSTD_sequenceBound(srcSize) is an upper bound on the number + * of sequences that can be generated. + * @param src The source buffer to generate sequences from of size @p srcSize. + * @param srcSize The size of the source buffer. * * Each block will end with a dummy sequence * with offset == 0, matchLength == 0, and litLength == length of last literals. * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) * simply acts as a block delimiter. * - * zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2 - * - * The output of this function can be fed into ZSTD_compressSequences() with CCtx - * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters - * @return : number of sequences generated + * @returns The number of sequences generated, necessarily less than + * ZSTD_sequenceBound(srcSize), or an error code that can be checked + * with ZSTD_isError(). */ - -ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); +ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") +ZSTDLIB_STATIC_API size_t +ZSTD_generateSequences(ZSTD_CCtx* zc, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize); /*! ZSTD_mergeBlockDelimiters() : * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals @@ -1388,8 +1608,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* o ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); /*! ZSTD_compressSequences() : - * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. - * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst. + * @src contains the entire input (not just the literals). + * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals + * If a dictionary is included, then the cctx should reference the dict (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.). * The entire source is compressed into a single frame. * * The compression behavior changes based on cctx params. In particular: @@ -1398,11 +1620,17 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si * the block size derived from the cctx, and sequences may be split. This is the default setting. * * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain - * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes + * using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit + * can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation. + * By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10). + * ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction. * - * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined - * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for - * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) and then bail out and return an error. * * In addition to the two adjustable experimental params, there are other important cctx params. * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. @@ -1410,14 +1638,42 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md * - * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. - * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, - * and cannot emit an RLE block that disagrees with the repcode history - * @return : final compressed size or a ZSTD error. - */ -ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize); + * Note: Repcodes are, as of now, always re-calculated within this function, ZSTD_Sequence.rep is effectively unused. + * Dev Note: Once ability to ingest repcodes become available, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history. + * @return : final compressed size, or a ZSTD error code. + */ +ZSTDLIB_STATIC_API size_t +ZSTD_compressSequences(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_compressSequencesAndLiterals() : + * This is a variant of ZSTD_compressSequences() which, + * instead of receiving (src,srcSize) as input parameter, receives (literals,litSize), + * aka all the literals, already extracted and laid out into a single continuous buffer. + * This can be useful if the process generating the sequences also happens to generate the buffer of literals, + * thus skipping an extraction + caching stage. + * It's a speed optimization, useful when the right conditions are met, + * but it also features the following limitations: + * - Only supports explicit delimiter mode + * - Currently does not support Sequences validation (so input Sequences are trusted) + * - Not compatible with frame checksum, which must be disabled + * - If any block is incompressible, will fail and return an error + * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. + * - @litBufCapacity is the size of the underlying buffer into which literals are written, starting at address @literals. + * @litBufCapacity must be at least 8 bytes larger than @litSize. + * - @decompressedSize must be correct, and correspond to the sum of all Sequences. Any discrepancy will generate an error. + * @return : final compressed size, or a ZSTD error code. + */ +ZSTDLIB_STATIC_API size_t +ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t nbSequences, + const void* literals, size_t litSize, size_t litBufCapacity, + size_t decompressedSize); /*! ZSTD_writeSkippableFrame() : @@ -1425,8 +1681,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* ds * * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number, * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. - * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so - * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, + * so the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. * * Returns an error if destination buffer is not large enough, if the source size is not representable * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). @@ -1434,26 +1690,28 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* ds * @return : number of bytes written or a ZSTD error. */ ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, - const void* src, size_t srcSize, unsigned magicVariant); + const void* src, size_t srcSize, + unsigned magicVariant); /*! ZSTD_readSkippableFrame() : - * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. + * Retrieves the content of a zstd skippable frame starting at @src, and writes it to @dst buffer. * - * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, - * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested - * in the magicVariant. + * The parameter @magicVariant will receive the magicVariant that was supplied when the frame was written, + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. + * This can be NULL if the caller is not interested in the magicVariant. * * Returns an error if destination buffer is not large enough, or if the frame is not skippable. * * @return : number of bytes written or a ZSTD error. */ -ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, - const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, + unsigned* magicVariant, + const void* src, size_t srcSize); /*! ZSTD_isSkippableFrame() : * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. */ -ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); +ZSTDLIB_STATIC_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); @@ -1464,48 +1722,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); /*! ZSTD_estimate*() : * These functions make it possible to estimate memory usage * of a future {D,C}Ctx, before its creation. + * This is useful in combination with ZSTD_initStatic(), + * which makes it possible to employ a static buffer for ZSTD_CCtx* state. * * ZSTD_estimateCCtxSize() will provide a memory budget large enough - * for any compression level up to selected one. - * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate - * does not include space for a window buffer. - * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() + * associated with any compression level up to max specified one. * The estimate will assume the input may be arbitrarily large, * which is the worst case. * + * Note that the size estimation is specific for one-shot compression, + * it is not valid for streaming (see ZSTD_estimateCStreamSize*()) + * nor other potential ways of using a ZSTD_CCtx* state. + * * When srcSize can be bound by a known and rather "small" value, - * this fact can be used to provide a tighter estimation - * because the CCtx compression context will need less memory. - * This tighter estimation can be provided by more advanced functions + * this knowledge can be used to provide a tighter budget estimation + * because the ZSTD_CCtx* state will need less memory for small inputs. + * This tighter estimation can be provided by employing more advanced functions * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. * - * Note 2 : only single-threaded compression is supported. + * Note : only single-threaded compression is supported. * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. */ -ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); /*! ZSTD_estimateCStreamSize() : - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. + * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression + * using any compression level up to the max specified one. + * It will also consider src size to be arbitrarily "large", which is a worst case scenario. * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. * Note : CStream size estimation is only correct for single-threaded compression. - * ZSTD_DStream memory budget depends on window Size. + * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. + * Size estimates assume that no external sequence producer is registered. + * + * ZSTD_DStream memory budget depends on frame's window Size. * This information can be passed manually, using ZSTD_estimateDStreamSize, * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Any frame requesting a window size larger than max specified one will be rejected. * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), * an internal ?Dict will be created, which additional size is not estimated here. - * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); + * In this case, get total size by adding ZSTD_estimate?DictSize + */ +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize); ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); /*! ZSTD_estimate?DictSize() : @@ -1568,7 +1837,15 @@ typedef void (*ZSTD_freeFunction) (void* opaque, void* address); typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; static __attribute__((__unused__)) + +#if defined(__clang__) && __clang_major__ >= 5 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ +#if defined(__clang__) && __clang_major__ >= 5 +#pragma clang diagnostic pop +#endif ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); @@ -1649,22 +1926,45 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); * This function never fails (wide contract) */ ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +/*! ZSTD_CCtx_setCParams() : + * Set all parameters provided within @p cparams into the working @p cctx. + * Note : if modifying parameters during compression (MT mode only), + * note that changes to the .windowLog parameter will be ignored. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). + * On failure, no parameters are updated. + */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams); + +/*! ZSTD_CCtx_setFParams() : + * Set all parameters provided within @p fparams into the working @p cctx. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). + */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams); + +/*! ZSTD_CCtx_setParams() : + * Set all parameters provided within @p params into the working @p cctx. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). + */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params); + /*! ZSTD_compress_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_compress2") +ZSTDLIB_STATIC_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); /*! ZSTD_compress_usingCDict_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +ZSTDLIB_STATIC_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1725,7 +2025,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * See the comments on that enum for an explanation of the feature. */ #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 -/* Controlled with ZSTD_paramSwitch_e enum. +/* Controlled with ZSTD_ParamSwitch_e enum. * Default is ZSTD_ps_auto. * Set to ZSTD_ps_disable to never compress literals. * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals @@ -1737,11 +2037,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 -/* Tries to fit compressed block size to be around targetCBlockSize. - * No target when targetCBlockSize == 0. - * There is no guarantee on compressed block size (default:0) */ -#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 - /* User's best guess of source size. * Hint is not valid when srcSizeHint == 0. * There is no guarantee that hint is close to actual source size, @@ -1808,13 +2103,16 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * Experimental parameter. * Default is 0 == disabled. Set to 1 to enable. * - * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same - * between calls, except for the modifications that zstd makes to pos (the - * caller must not modify pos). This is checked by the compressor, and - * compression will fail if it ever changes. This means the only flush - * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end - * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) - * MUST not be modified during compression or you will get data corruption. + * Tells the compressor that input data presented with ZSTD_inBuffer + * will ALWAYS be the same between calls. + * Technically, the @src pointer must never be changed, + * and the @pos field can only be updated by zstd. + * However, it's possible to increase the @size field, + * allowing scenarios where more data can be appended after compressions starts. + * These conditions are checked by the compressor, + * and compression will fail if they are not respected. + * Also, data in the ZSTD_inBuffer within the range [src, src + pos) + * MUST not be modified during compression or it will result in data corruption. * * When this flag is enabled zstd won't allocate an input window buffer, * because the user guarantees it can reference the ZSTD_inBuffer until @@ -1822,18 +2120,15 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also * avoid the memcpy() from the input buffer to the input window buffer. * - * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. - * That means this flag cannot be used with ZSTD_compressStream(). - * * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using * this flag is ALWAYS memory safe, and will never access out-of-bounds - * memory. However, compression WILL fail if you violate the preconditions. + * memory. However, compression WILL fail if conditions are not respected. * - * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST - * not be modified during compression or you will get data corruption. This - * is because zstd needs to reference data in the ZSTD_inBuffer to find + * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST + * not be modified during compression or it will result in data corruption. + * This is because zstd needs to reference data in the ZSTD_inBuffer to find * matches. Normally zstd maintains its own window buffer for this purpose, - * but passing this flag tells zstd to use the user provided buffer. + * but passing this flag tells zstd to rely on user provided buffer instead. */ #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 @@ -1871,22 +2166,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo /* ZSTD_c_validateSequences * Default is 0 == disabled. Set to 1 to enable sequence validation. * - * For use with sequence compression API: ZSTD_compressSequences(). - * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * For use with sequence compression API: ZSTD_compressSequences*(). + * Designates whether or not provided sequences are validated within ZSTD_compressSequences*() * during function execution. * - * Without validation, providing a sequence that does not conform to the zstd spec will cause - * undefined behavior, and may produce a corrupted block. + * When Sequence validation is disabled (default), Sequences are compressed as-is, + * so they must correct, otherwise it would result in a corruption error. * - * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * Sequence validation adds some protection, by ensuring that all values respect boundary conditions. + * If a Sequence is detected invalid (see doc/zstd_compression_format.md for * specifics regarding offset/matchlength requirements) then the function will bail out and * return an error. - * */ #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 -/* ZSTD_c_useBlockSplitter - * Controlled with ZSTD_paramSwitch_e enum. +/* ZSTD_c_blockSplitterLevel + * note: this parameter only influences the first splitter stage, + * which is active before producing the sequences. + * ZSTD_c_splitAfterSequences controls the next splitter stage, + * which is active after sequence production. + * Note that both can be combined. + * Allowed values are between 0 and ZSTD_BLOCKSPLITTER_LEVEL_MAX included. + * 0 means "auto", which will select a value depending on current ZSTD_c_strategy. + * 1 means no splitting. + * Then, values from 2 to 6 are sorted in increasing cpu load order. + * + * Note that currently the first block is never split, + * to ensure expansion guarantees in presence of incompressible data. + */ +#define ZSTD_BLOCKSPLITTER_LEVEL_MAX 6 +#define ZSTD_c_blockSplitterLevel ZSTD_c_experimentalParam20 + +/* ZSTD_c_splitAfterSequences + * This is a stronger splitter algorithm, + * based on actual sequences previously produced by the selected parser. + * It's also slower, and as a consequence, mostly used for high compression levels. + * While the post-splitter does overlap with the pre-splitter, + * both can nonetheless be combined, + * notably with ZSTD_c_blockSplitterLevel at ZSTD_BLOCKSPLITTER_LEVEL_MAX, + * resulting in higher compression ratio than just one of them. + * * Default is ZSTD_ps_auto. * Set to ZSTD_ps_disable to never use block splitter. * Set to ZSTD_ps_enable to always use block splitter. @@ -1894,10 +2213,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use * block splitting based on the compression parameters. */ -#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13 +#define ZSTD_c_splitAfterSequences ZSTD_c_experimentalParam13 /* ZSTD_c_useRowMatchFinder - * Controlled with ZSTD_paramSwitch_e enum. + * Controlled with ZSTD_ParamSwitch_e enum. * Default is ZSTD_ps_auto. * Set to ZSTD_ps_disable to never use row-based matchfinder. * Set to ZSTD_ps_enable to force usage of row-based matchfinder. @@ -1928,6 +2247,80 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 +/* ZSTD_c_prefetchCDictTables + * Controlled with ZSTD_ParamSwitch_e enum. Default is ZSTD_ps_auto. + * + * In some situations, zstd uses CDict tables in-place rather than copying them + * into the working context. (See docs on ZSTD_dictAttachPref_e above for details). + * In such situations, compression speed is seriously impacted when CDict tables are + * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables + * when they are used in-place. + * + * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit. + * For sufficiently large inputs, zstd will by default memcpy() CDict tables + * into the working context, so there is no need to prefetch. This parameter is + * targeted at a middle range of input sizes, where a prefetch is cheap enough to be + * useful but memcpy() is too expensive. The exact range of input sizes where this + * makes sense is best determined by careful experimentation. + * + * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable, + * but in the future zstd may conditionally enable this feature via an auto-detection + * heuristic for cold CDicts. + * Use ZSTD_ps_disable to opt out of prefetching under any circumstances. + */ +#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 + +/* ZSTD_c_enableSeqProducerFallback + * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. + * + * Controls whether zstd will fall back to an internal sequence producer if an + * external sequence producer is registered and returns an error code. This fallback + * is block-by-block: the internal sequence producer will only be called for blocks + * where the external sequence producer returns an error code. Fallback parsing will + * follow any other cParam settings, such as compression level, the same as in a + * normal (fully-internal) compression operation. + * + * The user is strongly encouraged to read the full Block-Level Sequence Producer API + * documentation (below) before setting this parameter. */ +#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17 + +/* ZSTD_c_maxBlockSize + * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). + * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. + * + * This parameter can be used to set an upper bound on the blocksize + * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper + * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make + * compressBound() inaccurate). Only currently meant to be used for testing. + */ +#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18 + +/* ZSTD_c_repcodeResolution + * This parameter only has an effect if ZSTD_c_blockDelimiters is + * set to ZSTD_sf_explicitBlockDelimiters (may change in the future). + * + * This parameter affects how zstd parses external sequences, + * provided via the ZSTD_compressSequences*() API + * or from an external block-level sequence producer. + * + * If set to ZSTD_ps_enable, the library will check for repeated offsets within + * external sequences, even if those repcodes are not explicitly indicated in + * the "rep" field. Note that this is the only way to exploit repcode matches + * while using compressSequences*() or an external sequence producer, since zstd + * currently ignores the "rep" field of external sequences. + * + * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in + * external sequences, regardless of whether the "rep" field has been set. This + * reduces sequence compression overhead by about 25% while sacrificing some + * compression ratio. + * + * The default value is ZSTD_ps_auto, for which the library will enable/disable + * based on compression level (currently: level<10 disables, level>=10 enables). + */ +#define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19 +#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */ + + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -2084,7 +2477,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete * in the range [dst, dst + pos) MUST not be modified during decompression * or you will get data corruption. * - * When this flags is enabled zstd won't allocate an output buffer, because + * When this flag is enabled zstd won't allocate an output buffer, because * it can write directly to the ZSTD_outBuffer, but it will still allocate * an input buffer large enough to fit any compressed block. This will also * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. @@ -2137,6 +2530,33 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete */ #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 +/* ZSTD_d_disableHuffmanAssembly + * Set to 1 to disable the Huffman assembly implementation. + * The default value is 0, which allows zstd to use the Huffman assembly + * implementation if available. + * + * This parameter can be used to disable Huffman assembly at runtime. + * If you want to disable it at compile time you can define the macro + * ZSTD_DISABLE_ASM. + */ +#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5 + +/* ZSTD_d_maxBlockSize + * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). + * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. + * + * Forces the decompressor to reject blocks whose content size is + * larger than the configured maxBlockSize. When maxBlockSize is + * larger than the windowSize, the windowSize is used instead. + * This saves memory on the decoder when you know all blocks are small. + * + * This option is typically used in conjunction with ZSTD_c_maxBlockSize. + * + * WARNING: This causes the decoder to reject otherwise valid frames + * that have block sizes larger than the configured maxBlockSize. + */ +#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6 + /*! ZSTD_DCtx_setFormat() : * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). @@ -2145,6 +2565,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete * such ZSTD_f_zstd1_magicless for example. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /*! ZSTD_decompressStream_simpleArgs() : @@ -2181,6 +2602,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs ( * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); @@ -2198,17 +2620,15 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /*! ZSTD_initCStream_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd parameter and leave the rest as-is. - * for ((param, value) : params) { - * ZSTD_CCtx_setParameter(zcs, param, value); - * } + * ZSTD_CCtx_setParams(zcs, params); * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); * @@ -2218,6 +2638,7 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, @@ -2232,15 +2653,13 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /*! ZSTD_initCStream_usingCDict_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. - * for ((fParam, value) : fParams) { - * ZSTD_CCtx_setParameter(zcs, fParam, value); - * } + * ZSTD_CCtx_setFParams(zcs, fParams); * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_refCDict(zcs, cdict); * @@ -2250,6 +2669,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, @@ -2264,7 +2684,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, * explicitly specified. * * start a new frame, using same parameters from previous frame. - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * This is typically useful to skip dictionary loading stage, since it will reuse it in-place. * Note that zcs must be init at least once before using ZSTD_resetCStream(). * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. @@ -2274,6 +2694,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); @@ -2319,8 +2740,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); * * note: no dictionary will be used if dict == NULL or dictSize < 8 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ +ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions") ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /*! @@ -2330,8 +2751,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const vo * ZSTD_DCtx_refDDict(zds, ddict); * * note : ddict is referenced, it must outlive decompression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ +ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions") ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /*! @@ -2339,18 +2760,202 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z * * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * - * re-use decompression parameters from previous init; saves dictionary loading - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * reuse decompression parameters from previous init; saves dictionary loading */ +ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions") ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); +/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API ********************* + * + * *** OVERVIEW *** + * The Block-Level Sequence Producer API allows users to provide their own custom + * sequence producer which libzstd invokes to process each block. The produced list + * of sequences (literals and matches) is then post-processed by libzstd to produce + * valid compressed blocks. + * + * This block-level offload API is a more granular complement of the existing + * frame-level offload API compressSequences() (introduced in v1.5.1). It offers + * an easier migration story for applications already integrated with libzstd: the + * user application continues to invoke the same compression functions + * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits + * from the specific advantages of the external sequence producer. For example, + * the sequence producer could be tuned to take advantage of known characteristics + * of the input, to offer better speed / ratio, or could leverage hardware + * acceleration not available within libzstd itself. + * + * See contrib/externalSequenceProducer for an example program employing the + * Block-Level Sequence Producer API. + * + * *** USAGE *** + * The user is responsible for implementing a function of type + * ZSTD_sequenceProducer_F. For each block, zstd will pass the following + * arguments to the user-provided function: + * + * - sequenceProducerState: a pointer to a user-managed state for the sequence + * producer. + * + * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer. + * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory + * backing outSeqs is managed by the CCtx. + * + * - src, srcSize: an input buffer for the sequence producer to parse. + * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. + * + * - dict, dictSize: a history buffer, which may be empty, which the sequence + * producer may reference as it parses the src buffer. Currently, zstd will + * always pass dictSize == 0 into external sequence producers, but this will + * change in the future. + * + * - compressionLevel: a signed integer representing the zstd compression level + * set by the user for the current operation. The sequence producer may choose + * to use this information to change its compression strategy and speed/ratio + * tradeoff. Note: the compression level does not reflect zstd parameters set + * through the advanced API. + * + * - windowSize: a size_t representing the maximum allowed offset for external + * sequences. Note that sequence offsets are sometimes allowed to exceed the + * windowSize if a dictionary is present, see doc/zstd_compression_format.md + * for details. + * + * The user-provided function shall return a size_t representing the number of + * sequences written to outSeqs. This return value will be treated as an error + * code if it is greater than outSeqsCapacity. The return value must be non-zero + * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided + * for convenience, but any value greater than outSeqsCapacity will be treated as + * an error code. + * + * If the user-provided function does not return an error code, the sequences + * written to outSeqs must be a valid parse of the src buffer. Data corruption may + * occur if the parse is not valid. A parse is defined to be valid if the + * following conditions hold: + * - The sum of matchLengths and literalLengths must equal srcSize. + * - All sequences in the parse, except for the final sequence, must have + * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have + * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0. + * - All offsets must respect the windowSize parameter as specified in + * doc/zstd_compression_format.md. + * - If the final sequence has matchLength == 0, it must also have offset == 0. + * + * zstd will only validate these conditions (and fail compression if they do not + * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence + * validation has a performance cost. + * + * If the user-provided function returns an error, zstd will either fall back + * to an internal sequence producer or fail the compression operation. The user can + * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback + * cParam. Fallback compression will follow any other cParam settings, such as + * compression level, the same as in a normal compression operation. + * + * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F + * function by calling + * ZSTD_registerSequenceProducer(cctx, + * sequenceProducerState, + * sequenceProducer) + * This setting will persist until the next parameter reset of the CCtx. + * + * The sequenceProducerState must be initialized by the user before calling + * ZSTD_registerSequenceProducer(). The user is responsible for destroying the + * sequenceProducerState. + * + * *** LIMITATIONS *** + * This API is compatible with all zstd compression APIs which respect advanced parameters. + * However, there are three limitations: + * + * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported. + * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level + * external sequence producer. + * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some + * cases (see its documentation for details). Users must explicitly set + * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external + * sequence producer is registered. + * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default + * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should + * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence + * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog). + * + * Second, history buffers are not currently supported. Concretely, zstd will always pass + * dictSize == 0 to the external sequence producer (for now). This has two implications: + * - Dictionaries are not currently supported. Compression will *not* fail if the user + * references a dictionary, but the dictionary won't have any effect. + * - Stream history is not currently supported. All advanced compression APIs, including + * streaming APIs, work with external sequence producers, but each block is treated as + * an independent chunk without history from previous blocks. + * + * Third, multi-threading within a single compression is not currently supported. In other words, + * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered. + * Multi-threading across compressions is fine: simply create one CCtx per thread. + * + * Long-term, we plan to overcome all three limitations. There is no technical blocker to + * overcoming them. It is purely a question of engineering effort. + */ + +#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1)) + +typedef size_t (*ZSTD_sequenceProducer_F) ( + void* sequenceProducerState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +); + +/*! ZSTD_registerSequenceProducer() : + * Instruct zstd to use a block-level external sequence producer function. + * + * The sequenceProducerState must be initialized by the caller, and the caller is + * responsible for managing its lifetime. This parameter is sticky across + * compressions. It will remain set until the user explicitly resets compression + * parameters. + * + * Sequence producer registration is considered to be an "advanced parameter", + * part of the "advanced API". This means it will only have an effect on compression + * APIs which respect advanced parameters, such as compress2() and compressStream2(). + * Older compression APIs such as compressCCtx(), which predate the introduction of + * "advanced parameters", will ignore any external sequence producer setting. + * + * The sequence producer can be "cleared" by registering a NULL function pointer. This + * removes all limitations described above in the "LIMITATIONS" section of the API docs. + * + * The user is strongly encouraged to read the full API documentation (above) before + * calling this function. */ +ZSTDLIB_STATIC_API void +ZSTD_registerSequenceProducer( + ZSTD_CCtx* cctx, + void* sequenceProducerState, + ZSTD_sequenceProducer_F sequenceProducer +); + +/*! ZSTD_CCtxParams_registerSequenceProducer() : + * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params. + * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(), + * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx(). + * + * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx() + * is required, then this function is for you. Otherwise, you probably don't need it. + * + * See tests/zstreamtest.c for example usage. */ +ZSTDLIB_STATIC_API void +ZSTD_CCtxParams_registerSequenceProducer( + ZSTD_CCtx_params* params, + void* sequenceProducerState, + ZSTD_sequenceProducer_F sequenceProducer +); + + /* ******************************************************************* -* Buffer-less and synchronous inner streaming functions +* Buffer-less and synchronous inner streaming functions (DEPRECATED) +* +* This API is deprecated, and will be removed in a future version. +* It allows streaming (de)compression with user allocated buffers. +* However, it is hard to use, and not as well tested as the rest of +* our API. * -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with several restrictions, documented below. -* Prefer normal streaming API for an easier experience. +* Please use the normal streaming API instead: ZSTD_compressStream2, +* and ZSTD_decompressStream. +* If there is functionality that you need, but it doesn't provide, +* please open an issue on our GitHub. ********************************************************************* */ /* @@ -2358,11 +2963,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); A ZSTD_CCtx object is required to track streaming operations. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. + ZSTD_CCtx object can be reused multiple times within successive compression operations. Start by initializing a context. Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() Then, consume your input using ZSTD_compressContinue(). There are some important considerations to keep in mind when using this advanced function : @@ -2380,39 +2984,49 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. + `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again. */ /*===== Buffer-less streaming compression functions =====*/ +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ -ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.") +ZSTDLIB_STATIC_API +size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ ZSTD_DEPRECATED("use advanced API to access custom parameters") +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ ZSTD_DEPRECATED("use advanced API to access custom parameters") +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ /* Buffer-less streaming decompression (synchronous mode) A ZSTD_DCtx object is required to track streaming operations. Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. + A ZSTD_DCtx object can be reused multiple times. First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. Data fragment must be large enough to ensure successful decoding. `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least result bytes on next attempt. errorCode, which can be tested using ZSTD_isError(). - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + It fills a ZSTD_FrameHeader structure with important information to correctly decode the frame, such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. As a consequence, check that values remain within valid application range. @@ -2428,7 +3042,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ The most memory efficient way is to use a round buffer of sufficient size. Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can @return an error code if required value is too large for current system (in 32-bits mode). + which can return an error code if required value is too large for current system (in 32-bits mode). In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, up to the moment there is not enough room left in the buffer to guarantee decoding another full block, which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. @@ -2448,7 +3062,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. It can also be an error code, which can be tested with ZSTD_isError(). @@ -2471,27 +3085,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ */ /*===== Buffer-less streaming decompression functions =====*/ -typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; -typedef struct { - unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ - unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ - unsigned blockSizeMax; - ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ - unsigned headerSize; - unsigned dictID; - unsigned checksumFlag; -} ZSTD_frameHeader; -/*! ZSTD_getFrameHeader() : - * decode Frame Header, or requires larger `srcSize`. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ -/*! ZSTD_getFrameHeader_advanced() : - * same as ZSTD_getFrameHeader(), - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); @@ -2502,6 +3096,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); /* misc */ +ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.") ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); @@ -2509,11 +3104,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -/* ============================ */ -/* Block level API */ -/* ============================ */ +/* ========================================= */ +/* Block level API (DEPRECATED) */ +/* ========================================= */ /*! + + This API is deprecated in favor of the regular compression API. + You can get the frame header down to 2 bytes by setting: + - ZSTD_c_format = ZSTD_f_zstd1_magicless + - ZSTD_c_contentSizeFlag = 0 + - ZSTD_c_checksumFlag = 0 + - ZSTD_c_dictIDFlag = 0 + + This API is not as well tested as our normal API, so we recommend not using it. + We will be removing it in a future version. If the normal API doesn't provide + the functionality you need, please open a GitHub issue. + Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. @@ -2524,7 +3131,6 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - It is necessary to init context before starting + compression : any ZSTD_compressBegin*() variant, including with dictionary + decompression : any ZSTD_decompressBegin*() variant, including with dictionary - + copyCCtx() and copyDCtx() can be used too - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + If input is larger than a block size, it's necessary to split input data into multiple blocks + For inputs larger than a single block, consider using regular ZSTD_compress() instead. @@ -2541,11 +3147,14 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); */ /*===== Raw zstd block functions =====*/ +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ - diff --git a/include/linux/zswap.h b/include/linux/zswap.h index d961ead91bf1..30c193a1207e 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -26,7 +26,7 @@ struct zswap_lruvec_state { unsigned long zswap_total_pages(void); bool zswap_store(struct folio *folio); -bool zswap_load(struct folio *folio); +int zswap_load(struct folio *folio); void zswap_invalidate(swp_entry_t swp); int zswap_swapon(int type, unsigned long nr_pages); void zswap_swapoff(int type); @@ -44,9 +44,9 @@ static inline bool zswap_store(struct folio *folio) return false; } -static inline bool zswap_load(struct folio *folio) +static inline int zswap_load(struct folio *folio) { - return false; + return -ENOENT; } static inline void zswap_invalidate(swp_entry_t swp) {} |