diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2025-01-21 08:37:39 +0300 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2025-01-21 08:37:39 +0300 |
| commit | 25768de50b1f2dbb6ea44bd5148a87fe2c9c3688 (patch) | |
| tree | 91f4e0c1ea9acb1e8d477a5f4dfedd00de67ae13 /include/linux | |
| parent | 3a6e5ed2372bcb2a3c554fda32419efd91ff9b0c (diff) | |
| parent | 08bd5b7c9a2401faabdaa1472d45c7de0755fd7e (diff) | |
| download | linux-25768de50b1f2dbb6ea44bd5148a87fe2c9c3688.tar.xz | |
Merge branch 'next' into for-linus
Prepare input updates for 6.14 merge window.
Diffstat (limited to 'include/linux')
534 files changed, 15253 insertions, 6682 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0687a442fec7..6adcd1b92b20 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -40,7 +40,7 @@ struct irq_domain_ops; #include <asm/acpi.h> #ifdef CONFIG_ACPI_TABLE_LIB -#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, "ACPI") #define __init_or_acpilib #define __initdata_or_acpilib #else @@ -107,6 +107,7 @@ enum acpi_irq_model_id { ACPI_IRQ_MODEL_PLATFORM, ACPI_IRQ_MODEL_GIC, ACPI_IRQ_MODEL_LPIC, + ACPI_IRQ_MODEL_RINTC, ACPI_IRQ_MODEL_COUNT }; @@ -362,6 +363,7 @@ void acpi_unregister_gsi (u32 gsi); struct pci_dev; +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin); int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); @@ -386,7 +388,7 @@ extern bool acpi_is_pnp_device(struct acpi_device *); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) -typedef void (*wmi_notify_handler) (u32 value, void *context); +typedef void (*wmi_notify_handler) (union acpi_object *data, void *context); int wmi_instance_count(const char *guid); @@ -401,7 +403,6 @@ extern acpi_status wmi_set_block(const char *guid, u8 instance, extern acpi_status wmi_install_notify_handler(const char *guid, wmi_notify_handler handler, void *data); extern acpi_status wmi_remove_notify_handler(const char *guid); -extern acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out); extern bool wmi_has_guid(const char *guid); extern char *wmi_get_acpi_device_uid(const char *guid); @@ -1163,8 +1164,6 @@ int acpi_subsys_suspend_noirq(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); -void acpi_ec_mark_gpe_for_wake(void); -void acpi_ec_set_gpe_wake_mask(u8 action); int acpi_subsys_restore_early(struct device *dev); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } @@ -1175,6 +1174,12 @@ static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } static inline int acpi_subsys_restore_early(struct device *dev) { return 0; } +#endif + +#if defined(CONFIG_ACPI_EC) && defined(CONFIG_PM_SLEEP) +void acpi_ec_mark_gpe_for_wake(void); +void acpi_ec_set_gpe_wake_mask(u8 action); +#else static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif @@ -1343,6 +1348,8 @@ struct acpi_probe_entry { kernel_ulong_t driver_data; }; +void arch_sort_irqchip_probe(struct acpi_probe_entry *ap_head, int nr); + #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, \ valid, data, fn) \ static const struct acpi_probe_entry __acpi_probe_##name \ @@ -1523,11 +1530,7 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu) } #endif -#ifdef CONFIG_ARM64 -void acpi_arm_init(void); -#else -static inline void acpi_arm_init(void) { } -#endif +void acpi_arch_init(void); #ifdef CONFIG_ACPI_PCC void acpi_init_pcc(void); diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h index 72cedb916a9c..e748b2877602 100644 --- a/include/linux/acpi_dma.h +++ b/include/linux/acpi_dma.h @@ -11,10 +11,11 @@ #ifndef __LINUX_ACPI_DMA_H #define __LINUX_ACPI_DMA_H -#include <linux/list.h> -#include <linux/device.h> #include <linux/err.h> #include <linux/dmaengine.h> +#include <linux/types.h> + +struct device; /** * struct acpi_dma_spec - slave device DMA resources @@ -65,7 +66,6 @@ int devm_acpi_dma_controller_register(struct device *dev, struct dma_chan *(*acpi_dma_xlate) (struct acpi_dma_spec *, struct acpi_dma *), void *data); -void devm_acpi_dma_controller_free(struct device *dev); struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev, size_t index); @@ -94,9 +94,6 @@ static inline int devm_acpi_dma_controller_register(struct device *dev, { return -ENODEV; } -static inline void devm_acpi_dma_controller_free(struct device *dev) -{ -} static inline struct dma_chan *acpi_dma_request_slave_chan_by_index( struct device *dev, size_t index) diff --git a/include/linux/acpi_pmtmr.h b/include/linux/acpi_pmtmr.h index 50d88bf1498d..0ded9220d379 100644 --- a/include/linux/acpi_pmtmr.h +++ b/include/linux/acpi_pmtmr.h @@ -26,6 +26,19 @@ static inline u32 acpi_pm_read_early(void) return acpi_pm_read_verified() & ACPI_PM_MASK; } +/** + * Register callback for suspend and resume event + * + * @cb Callback triggered on suspend and resume + * @data Data passed with the callback + */ +void acpi_pmtmr_register_suspend_resume_callback(void (*cb)(void *data, bool suspend), void *data); + +/** + * Remove registered callback for suspend and resume event + */ +void acpi_pmtmr_unregister_suspend_resume_callback(void); + #else static inline u32 acpi_pm_read_early(void) diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 05e758b8b894..3ffa5341dce2 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -20,12 +20,6 @@ enum alarmtimer_type { ALARM_BOOTTIME_FREEZER, }; -enum alarmtimer_restart { - ALARMTIMER_NORESTART, - ALARMTIMER_RESTART, -}; - - #define ALARMTIMER_STATE_INACTIVE 0x00 #define ALARMTIMER_STATE_ENQUEUED 0x01 @@ -42,14 +36,14 @@ enum alarmtimer_restart { struct alarm { struct timerqueue_node node; struct hrtimer timer; - enum alarmtimer_restart (*function)(struct alarm *, ktime_t now); + void (*function)(struct alarm *, ktime_t now); enum alarmtimer_type type; int state; void *data; }; void alarm_init(struct alarm *alarm, enum alarmtimer_type type, - enum alarmtimer_restart (*function)(struct alarm *, ktime_t)); + void (*function)(struct alarm *, ktime_t)); void alarm_start(struct alarm *alarm, ktime_t start); void alarm_start_relative(struct alarm *alarm, ktime_t start); void alarm_restart(struct alarm *alarm); diff --git a/include/linux/alcor_pci.h b/include/linux/alcor_pci.h index c4a0b23846d8..dcb1d37dabc2 100644 --- a/include/linux/alcor_pci.h +++ b/include/linux/alcor_pci.h @@ -11,6 +11,7 @@ #define ALCOR_SD_CARD 0 #define ALCOR_MS_CARD 1 +#define DRV_NAME_ALCOR_PCI "alcor_pci" #define DRV_NAME_ALCOR_PCI_SDMMC "alcor_sdmmc" #define DRV_NAME_ALCOR_PCI_MS "alcor_ms" diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index 8c61ccd161ba..7c0786bdf9af 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -30,6 +30,21 @@ struct alloc_tag { struct alloc_tag_counters __percpu *counters; } __aligned(8); +struct alloc_tag_kernel_section { + struct alloc_tag *first_tag; + unsigned long count; +}; + +struct alloc_tag_module_section { + union { + unsigned long start_addr; + struct alloc_tag *first_tag; + }; + unsigned long end_addr; + /* used size */ + unsigned long size; +}; + #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG #define CODETAG_EMPTY ((void *)1) @@ -54,6 +69,8 @@ static inline void set_codetag_empty(union codetag_ref *ref) {} #ifdef CONFIG_MEM_ALLOC_PROFILING +#define ALLOC_TAG_SECTION_NAME "alloc_tags" + struct codetag_bytes { struct codetag *ct; s64 bytes; @@ -70,13 +87,13 @@ static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct) /* * When percpu variables are required to be defined as weak, static percpu * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION). - * Instead we will accound all module allocations to a single counter. + * Instead we will account all module allocations to a single counter. */ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_shared_alloc_tag }; @@ -85,7 +102,7 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ - __section("alloc_tags") = { \ + __section(ALLOC_TAG_SECTION_NAME) = { \ .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; @@ -135,9 +152,21 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) {} #endif /* Caller should verify both ref and tag to be valid */ -static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +static inline bool __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) { + alloc_tag_add_check(ref, tag); + if (!ref || !tag) + return false; + ref->ct = &tag->ct; + return true; +} + +static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +{ + if (unlikely(!__alloc_tag_ref_set(ref, tag))) + return false; + /* * We need in increment the call counter every time we have a new * allocation or when we split a large allocation into smaller ones. @@ -145,25 +174,13 @@ static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag * counter because when we free each part the counter will be decremented. */ this_cpu_inc(tag->counters->calls); -} - -static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) -{ - alloc_tag_add_check(ref, tag); - if (!ref || !tag) - return; - - __alloc_tag_ref_set(ref, tag); + return true; } static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) { - alloc_tag_add_check(ref, tag); - if (!ref || !tag) - return; - - __alloc_tag_ref_set(ref, tag); - this_cpu_add(tag->counters->bytes, bytes); + if (likely(alloc_tag_ref_set(ref, tag))) + this_cpu_add(tag->counters->bytes, bytes); } static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 958a55bcc708..9946276aff73 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -105,7 +105,7 @@ enum amba_vendor { AMBA_VENDOR_LSI = 0xb6, }; -extern struct bus_type amba_bustype; +extern const struct bus_type amba_bustype; #define to_amba_device(d) container_of_const(d, struct amba_device, dev) @@ -121,6 +121,7 @@ extern struct bus_type amba_bustype; #ifdef CONFIG_ARM_AMBA int __amba_driver_register(struct amba_driver *, struct module *); void amba_driver_unregister(struct amba_driver *); +bool dev_is_amba(const struct device *dev); #else static inline int __amba_driver_register(struct amba_driver *drv, struct module *owner) @@ -130,6 +131,10 @@ static inline int __amba_driver_register(struct amba_driver *drv, static inline void amba_driver_unregister(struct amba_driver *drv) { } +static inline bool dev_is_amba(const struct device *dev) +{ + return false; +} #endif struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t); diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index b721f360d759..2222e8b03ff4 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -11,10 +11,6 @@ void topology_normalize_cpu_scale(void); int topology_update_cpu_topology(void); -#ifdef CONFIG_ACPI_CPPC_LIB -void topology_init_cpu_capacity_cppc(void); -#endif - struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); @@ -49,6 +45,7 @@ enum scale_freq_source { SCALE_FREQ_SOURCE_CPUFREQ = 0, SCALE_FREQ_SOURCE_ARCH, SCALE_FREQ_SOURCE_CPPC, + SCALE_FREQ_SOURCE_VIRT, }; struct scale_freq_data { diff --git a/include/linux/args.h b/include/linux/args.h index 8ff60a54eb7d..2e8e65d975c7 100644 --- a/include/linux/args.h +++ b/include/linux/args.h @@ -17,9 +17,9 @@ * that as _n. */ -/* This counts to 12. Any more, it will return 13th argument. */ -#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) /* Concatenate two parameters, but allow them to be expanded beforehand. */ #define __CONCAT(a, b) a ## b diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 083f85653716..67f6fdf2e7cd 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -115,6 +115,70 @@ /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 #define ARM_SMCCC_KVM_FUNC_PTP 1 +/* Start of pKVM hypercall range */ +#define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO 2 +#define ARM_SMCCC_KVM_FUNC_MEM_SHARE 3 +#define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE 4 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_5 5 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_6 6 +#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD 7 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_8 8 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_9 9 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_10 10 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_11 11 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_12 12 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_13 13 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_14 14 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_15 15 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_16 16 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_17 17 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_18 18 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_19 19 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_20 20 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_21 21 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_22 22 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_23 23 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_24 24 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_25 25 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_26 26 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_27 27 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_28 28 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_29 29 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_30 30 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_31 31 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_32 32 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_33 33 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_34 34 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_35 35 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_36 36 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_37 37 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_38 38 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_39 39 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_40 40 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_41 41 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_42 42 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_43 43 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_44 44 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_45 45 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_46 46 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_47 47 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_48 48 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_49 49 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_50 50 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_51 51 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_52 52 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_53 53 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_54 54 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_55 55 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_56 56 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_57 57 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_58 58 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_59 59 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_60 60 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_61 61 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_62 62 +#define ARM_SMCCC_KVM_FUNC_PKVM_RESV_63 63 +/* End of pKVM hypercall range */ #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -137,6 +201,30 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_PTP) +#define ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_HYP_MEMINFO) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MEM_SHARE) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MEM_UNSHARE) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MMIO_GUARD) + /* ptp_kvm counter type ID */ #define KVM_PTP_VIRT_COUNTER 0 #define KVM_PTP_PHYS_COUNTER 1 @@ -227,8 +315,6 @@ u32 arm_smccc_get_version(void); void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit); -extern u64 smccc_has_sve_hint; - /** * arm_smccc_get_soc_id_version() * @@ -327,15 +413,6 @@ struct arm_smccc_quirk { }; /** - * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls - * - * Sets the SMCCC hint bit to indicate if there is live state in the SVE - * registers, this modifies x0 in place and should never be called from C - * code. - */ -asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0); - -/** * __arm_smccc_smc() - make SMC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 @@ -402,20 +479,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #endif -/* nVHE hypervisor doesn't have a current thread so needs separate checks */ -#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__) - -#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \ - ARM64_SVE) -#define smccc_sve_clobbers "x16", "x30", "cc", - -#else - -#define SMCCC_SVE_CHECK -#define smccc_sve_clobbers - -#endif - #define __constraint_read_2 "r" (arg0) #define __constraint_read_3 __constraint_read_2, "r" (arg1) #define __constraint_read_4 __constraint_read_3, "r" (arg2) @@ -486,12 +549,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, register unsigned long r3 asm("r3"); \ CONCATENATE(__declare_arg_, \ COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \ - asm volatile(SMCCC_SVE_CHECK \ - inst "\n" : \ + asm volatile(inst "\n" : \ "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \ : CONCATENATE(__constraint_read_, \ COUNT_ARGS(__VA_ARGS__)) \ - : smccc_sve_clobbers "memory"); \ + : "memory"); \ if (___res) \ *___res = (typeof(*___res)){r0, r1, r2, r3}; \ } while (0) @@ -540,7 +602,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, asm ("" : \ : CONCATENATE(__constraint_read_, \ COUNT_ARGS(__VA_ARGS__)) \ - : smccc_sve_clobbers "memory"); \ + : "memory"); \ if (___res) \ ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ } while (0) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 89683f31ae12..a28e2a6a13d0 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -73,6 +73,11 @@ #define FFA_FN64_MEM_PERM_GET FFA_SMC_64(0x88) #define FFA_MEM_PERM_SET FFA_SMC_32(0x89) #define FFA_FN64_MEM_PERM_SET FFA_SMC_64(0x89) +#define FFA_CONSOLE_LOG FFA_SMC_32(0x8A) +#define FFA_PARTITION_INFO_GET_REGS FFA_SMC_64(0x8B) +#define FFA_EL3_INTR_HANDLE FFA_SMC_32(0x8C) +#define FFA_MSG_SEND_DIRECT_REQ2 FFA_SMC_64(0x8D) +#define FFA_MSG_SEND_DIRECT_RESP2 FFA_SMC_64(0x8E) /* * For some calls it is necessary to use SMC64 to pass or return 64-bit values. @@ -265,6 +270,11 @@ struct ffa_indirect_msg_hdr { u32 size; }; +/* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP}2 which pass data via registers */ +struct ffa_send_direct_data2 { + unsigned long data[14]; /* x4-x17 */ +}; + struct ffa_mem_region_addr_range { /* The base IPA of the constituent memory region, aligned to 4 kiB */ u64 address; @@ -426,6 +436,8 @@ struct ffa_msg_ops { int (*sync_send_receive)(struct ffa_device *dev, struct ffa_send_direct_data *data); int (*indirect_send)(struct ffa_device *dev, void *buf, size_t sz); + int (*sync_send_receive2)(struct ffa_device *dev, const uuid_t *uuid, + struct ffa_send_direct_data2 *data); }; struct ffa_mem_ops { diff --git a/include/linux/asn1_decoder.h b/include/linux/asn1_decoder.h index 83f9c6e1e5e9..b41bce82a191 100644 --- a/include/linux/asn1_decoder.h +++ b/include/linux/asn1_decoder.h @@ -9,6 +9,7 @@ #define _LINUX_ASN1_DECODER_H #include <linux/asn1.h> +#include <linux/types.h> struct asn1_decoder; diff --git a/include/linux/asn1_encoder.h b/include/linux/asn1_encoder.h index 08cd0c2ad34f..d17484dffb74 100644 --- a/include/linux/asn1_encoder.h +++ b/include/linux/asn1_encoder.h @@ -6,7 +6,6 @@ #include <linux/types.h> #include <linux/asn1.h> #include <linux/asn1_ber_bytecode.h> -#include <linux/bug.h> #define asn1_oid_len(oid) (sizeof(oid)/sizeof(u32)) unsigned char * diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h deleted file mode 100644 index 76860a461ed2..000000000000 --- a/include/linux/ath9k_platform.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2008 Atheros Communications Inc. - * Copyright (c) 2009 Gabor Juhos <juhosg@openwrt.org> - * Copyright (c) 2009 Imre Kaloz <kaloz@openwrt.org> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -#ifndef _LINUX_ATH9K_PLATFORM_H -#define _LINUX_ATH9K_PLATFORM_H - -#define ATH9K_PLAT_EEP_MAX_WORDS 2048 - -struct ath9k_platform_data { - const char *eeprom_name; - - u16 eeprom_data[ATH9K_PLAT_EEP_MAX_WORDS]; - u8 *macaddr; - - int led_pin; - u32 gpio_mask; - u32 gpio_val; - - u32 bt_active_pin; - u32 bt_priority_pin; - u32 wlan_active_pin; - - bool endian_check; - bool is_clk_25mhz; - bool tx_gain_buffalo; - bool disable_2ghz; - bool disable_5ghz; - bool led_active_high; - - int (*get_mac_revision)(void); - int (*external_reset)(void); - - bool use_eeprom; -}; - -#endif /* _LINUX_ATH9K_PLATFORM_H */ diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index e4004d1e6725..b3643de9931d 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -61,14 +61,8 @@ int attribute_container_device_trigger_safe(struct device *dev, int (*undo)(struct attribute_container *, struct device *, struct device *)); -void attribute_container_trigger(struct device *dev, - int (*fn)(struct attribute_container *, - struct device *)); int attribute_container_add_attrs(struct device *classdev); int attribute_container_add_class_device(struct device *classdev); -int attribute_container_add_class_device_adapter(struct attribute_container *cont, - struct device *dev, - struct device *classdev); void attribute_container_remove_attrs(struct device *classdev); void attribute_container_class_device_del(struct device *classdev); struct attribute_container *attribute_container_classdev_to_container(struct device *); diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 662b8ae54b6a..65dd7f154374 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -269,8 +269,4 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); #define module_auxiliary_driver(__auxiliary_driver) \ module_driver(__auxiliary_driver, auxiliary_driver_register, auxiliary_driver_unregister) -struct auxiliary_device *auxiliary_find_device(struct device *start, - const void *data, - int (*match)(struct device *dev, const void *data)); - #endif /* _AUXILIARY_BUS_H_ */ diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 8e177b67e82f..13a11f3c09b8 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -89,6 +89,9 @@ enum virtchnl_rx_hsplit { VIRTCHNL_RX_HSPLIT_SPLIT_SCTP = 8, }; +enum virtchnl_bw_limit_type { + VIRTCHNL_BW_SHAPER = 0, +}; /* END GENERIC DEFINES */ /* Opcodes for VF-PF communication. These are placed in the v_opcode field @@ -151,6 +154,11 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, + /* opcode 57 - 65 are reserved */ + VIRTCHNL_OP_GET_QOS_CAPS = 66, + /* opcode 68 through 111 are reserved */ + VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, + VIRTCHNL_OP_CONFIG_QUANTA = 113, VIRTCHNL_OP_MAX, }; @@ -247,6 +255,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); /* used to negotiate communicating link speeds in Mbps */ #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED BIT(7) #define VIRTCHNL_VF_OFFLOAD_CRC BIT(10) +#define VIRTCHNL_VF_OFFLOAD_TC_U32 BIT(11) #define VIRTCHNL_VF_OFFLOAD_VLAN_V2 BIT(15) #define VIRTCHNL_VF_OFFLOAD_VLAN BIT(16) #define VIRTCHNL_VF_OFFLOAD_RX_POLLING BIT(17) @@ -260,6 +269,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26) #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) +#define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -1121,6 +1131,7 @@ enum virtchnl_vfr_states { }; #define VIRTCHNL_MAX_NUM_PROTO_HDRS 32 +#define VIRTCHNL_MAX_SIZE_RAW_PACKET 1024 #define PROTO_HDR_SHIFT 5 #define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT) #define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1) @@ -1266,13 +1277,22 @@ struct virtchnl_proto_hdrs { u8 pad[3]; /** * specify where protocol header start from. + * must be 0 when sending a raw packet request. * 0 - from the outer layer * 1 - from the first inner layer * 2 - from the second inner layer * .... **/ int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ - struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; + union { + struct virtchnl_proto_hdr + proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; + struct { + u16 pkt_len; + u8 spec[VIRTCHNL_MAX_SIZE_RAW_PACKET]; + u8 mask[VIRTCHNL_MAX_SIZE_RAW_PACKET]; + } raw; + }; }; VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs); @@ -1405,6 +1425,86 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +struct virtchnl_shaper_bw { + /* Unit is Kbps */ + u32 committed; + u32 peak; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_shaper_bw); + +/* VIRTCHNL_OP_GET_QOS_CAPS + * VF sends this message to get its QoS Caps, such as + * TC number, Arbiter and Bandwidth. + */ +struct virtchnl_qos_cap_elem { + u8 tc_num; + u8 tc_prio; +#define VIRTCHNL_ABITER_STRICT 0 +#define VIRTCHNL_ABITER_ETS 2 + u8 arbiter; +#define VIRTCHNL_STRICT_WEIGHT 1 + u8 weight; + enum virtchnl_bw_limit_type type; + union { + struct virtchnl_shaper_bw shaper; + u8 pad2[32]; + }; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(40, virtchnl_qos_cap_elem); + +struct virtchnl_qos_cap_list { + u16 vsi_id; + u16 num_elem; + struct virtchnl_qos_cap_elem cap[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_qos_cap_list); +#define virtchnl_qos_cap_list_LEGACY_SIZEOF 44 + +/* VIRTCHNL_OP_CONFIG_QUEUE_BW */ +struct virtchnl_queue_bw { + u16 queue_id; + u8 tc; + u8 pad; + struct virtchnl_shaper_bw shaper; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_queue_bw); + +struct virtchnl_queues_bw_cfg { + u16 vsi_id; + u16 num_queues; + struct virtchnl_queue_bw cfg[]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(4, virtchnl_queues_bw_cfg); +#define virtchnl_queues_bw_cfg_LEGACY_SIZEOF 16 + +enum virtchnl_queue_type { + VIRTCHNL_QUEUE_TYPE_TX = 0, + VIRTCHNL_QUEUE_TYPE_RX = 1, +}; + +/* structure to specify a chunk of contiguous queues */ +struct virtchnl_queue_chunk { + /* see enum virtchnl_queue_type */ + s32 type; + u16 start_queue_id; + u16 num_queues; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(8, virtchnl_queue_chunk); + +struct virtchnl_quanta_cfg { + u16 quanta_size; + u16 pad; + struct virtchnl_queue_chunk queue_select; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_quanta_cfg); + #define __vss_byone(p, member, count, old) \ (struct_size(p, member, count) + (old - 1 - struct_size(p, member, 0))) @@ -1427,6 +1527,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); __vss(virtchnl_vlan_filter_list_v2, __vss_byelem, p, m, c), \ __vss(virtchnl_tc_info, __vss_byelem, p, m, c), \ __vss(virtchnl_rdma_qvlist_info, __vss_byelem, p, m, c), \ + __vss(virtchnl_qos_cap_list, __vss_byelem, p, m, c), \ + __vss(virtchnl_queues_bw_cfg, __vss_byelem, p, m, c), \ __vss(virtchnl_rss_key, __vss_byone, p, m, c), \ __vss(virtchnl_rss_lut, __vss_byone, p, m, c)) @@ -1626,6 +1728,35 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2: valid_len = sizeof(struct virtchnl_vlan_setting); break; + case VIRTCHNL_OP_GET_QOS_CAPS: + break; + case VIRTCHNL_OP_CONFIG_QUEUE_BW: + valid_len = virtchnl_queues_bw_cfg_LEGACY_SIZEOF; + if (msglen >= valid_len) { + struct virtchnl_queues_bw_cfg *q_bw = + (struct virtchnl_queues_bw_cfg *)msg; + + valid_len = virtchnl_struct_size(q_bw, cfg, + q_bw->num_queues); + if (q_bw->num_queues == 0) { + err_msg_format = true; + break; + } + } + break; + case VIRTCHNL_OP_CONFIG_QUANTA: + valid_len = sizeof(struct virtchnl_quanta_cfg); + if (msglen >= valid_len) { + struct virtchnl_quanta_cfg *q_quanta = + (struct virtchnl_quanta_cfg *)msg; + + if (q_quanta->quanta_size == 0 || + q_quanta->queue_select.num_queues == 0) { + err_msg_format = true; + break; + } + } + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 4b61b0e57720..1476a6ed1bfd 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -14,9 +14,8 @@ struct backing_file_ctx { const struct cred *cred; - struct file *user_file; - void (*accessed)(struct file *); - void (*end_write)(struct file *); + void (*accessed)(struct file *file); + void (*end_write)(struct kiocb *iocb, ssize_t); }; struct file *backing_file_open(const struct path *user_path, int flags, @@ -31,13 +30,13 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, struct kiocb *iocb, int flags, struct backing_file_ctx *ctx); -ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, +ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb, struct pipe_inode_info *pipe, size_t len, unsigned int flags, struct backing_file_ctx *ctx); ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, loff_t *ppos, size_t len, - unsigned int flags, + struct file *out, struct kiocb *iocb, + size_t len, unsigned int flags, struct backing_file_ctx *ctx); int backing_file_mmap(struct file *file, struct vm_area_struct *vma, struct backing_file_ctx *ctx); diff --git a/include/linux/backlight.h b/include/linux/backlight.h index ea9c1bc8148e..f5652e5a9060 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -66,24 +66,6 @@ enum backlight_type { BACKLIGHT_TYPE_MAX, }; -/** - * enum backlight_notification - the type of notification - * - * The notifications that is used for notification sent to the receiver - * that registered notifications using backlight_register_notifier(). - */ -enum backlight_notification { - /** - * @BACKLIGHT_REGISTERED: The backlight device is registered. - */ - BACKLIGHT_REGISTERED, - - /** - * @BACKLIGHT_UNREGISTERED: The backlight revice is unregistered. - */ - BACKLIGHT_UNREGISTERED, -}; - /** enum backlight_scale - the type of scale used for brightness values * * The type of scale used for brightness values. @@ -421,8 +403,6 @@ void devm_backlight_device_unregister(struct device *dev, struct backlight_device *bd); void backlight_force_update(struct backlight_device *bd, enum backlight_update_reason reason); -int backlight_register_notifier(struct notifier_block *nb); -int backlight_unregister_notifier(struct notifier_block *nb); struct backlight_device *backlight_device_get_by_name(const char *name); struct backlight_device *backlight_device_get_by_type(enum backlight_type type); int backlight_device_set_brightness(struct backlight_device *bd, diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 68da8dba5162..dba41b65ae0d 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -203,7 +203,7 @@ struct pci_dev; #define BCMA_CORE_PCI_MDIO_RXCTRL0 0x840 /* PCIE Root Capability Register bits (Host mode only) */ -#define BCMA_CORE_PCI_RC_CRS_VISIBILITY 0x0001 +#define BCMA_CORE_PCI_RC_RRS_VISIBILITY 0x0001 struct bcma_drv_pci; struct bcma_bus; diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index dd831c269e99..dbf0f74c1529 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -72,7 +72,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); -int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len); void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); @@ -99,7 +99,7 @@ static inline void bioset_integrity_free(struct bio_set *bs) } static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, - ssize_t len, u32 seed) + ssize_t len) { return -EINVAL; } diff --git a/include/linux/bio.h b/include/linux/bio.h index a46e2047bea4..7a1b3b1a8fed 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -324,8 +324,8 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); -struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, - unsigned *segs, struct bio_set *bs, unsigned max_bytes); +int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, + unsigned *segs, unsigned max_bytes); /** * bio_next_split - get next @sectors from a bio, splitting if necessary @@ -418,14 +418,12 @@ bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); -int bio_add_zone_append_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int offset); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); -void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter); +void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); @@ -677,6 +675,23 @@ static inline void bio_clear_polled(struct bio *bio) bio->bi_opf &= ~REQ_POLLED; } +/** + * bio_is_zone_append - is this a zone append bio? + * @bio: bio to check + * + * Check if @bio is a zone append operation. Core block layer code and end_io + * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check + * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if + * it is not natively supported. + */ +static inline bool bio_is_zone_append(struct bio *bio) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) + return false; + return bio_op(bio) == REQ_OP_ZONE_APPEND || + bio_flagged(bio, BIO_EMULATES_ZONE_APPEND); +} + struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index d3b66d77df7a..262b6596eca5 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -203,12 +203,12 @@ unsigned long bitmap_find_next_zero_area_off(unsigned long *map, * the bit offset of all zero areas this function finds is multiples of that * power of 2. A @align_mask of 0 means no alignment is required. */ -static inline unsigned long -bitmap_find_next_zero_area(unsigned long *map, - unsigned long size, - unsigned long start, - unsigned int nr, - unsigned long align_mask) +static __always_inline +unsigned long bitmap_find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask) { return bitmap_find_next_zero_area_off(map, size, start, nr, align_mask, 0); @@ -228,7 +228,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, #define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) -static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) +static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -238,7 +238,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) memset(dst, 0, len); } -static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) +static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -248,8 +248,8 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) memset(dst, 0xff, len); } -static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, - unsigned int nbits) +static __always_inline +void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int len = bitmap_size(nbits); @@ -262,8 +262,8 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, /* * Copy bitmap and clear tail bits in last word. */ -static inline void bitmap_copy_clear_tail(unsigned long *dst, - const unsigned long *src, unsigned int nbits) +static __always_inline +void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits) { bitmap_copy(dst, src, nbits); if (nbits % BITS_PER_LONG) @@ -318,16 +318,18 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits); bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits)) #endif -static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_and(dst, src1, src2, nbits); } -static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; @@ -335,8 +337,9 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } -static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 ^ *src2; @@ -344,16 +347,17 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, __bitmap_xor(dst, src1, src2, nbits); } -static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; return __bitmap_andnot(dst, src1, src2, nbits); } -static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, - unsigned int nbits) +static __always_inline +void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = ~(*src); @@ -368,8 +372,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) -static inline bool bitmap_equal(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); @@ -388,10 +392,9 @@ static inline bool bitmap_equal(const unsigned long *src1, * * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise */ -static inline bool bitmap_or_equal(const unsigned long *src1, - const unsigned long *src2, - const unsigned long *src3, - unsigned int nbits) +static __always_inline +bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, + const unsigned long *src3, unsigned int nbits) { if (!small_const_nbits(nbits)) return __bitmap_or_equal(src1, src2, src3, nbits); @@ -399,9 +402,8 @@ static inline bool bitmap_or_equal(const unsigned long *src1, return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits)); } -static inline bool bitmap_intersects(const unsigned long *src1, - const unsigned long *src2, - unsigned int nbits) +static __always_inline +bool bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; @@ -409,8 +411,8 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -static inline bool bitmap_subset(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static __always_inline +bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); @@ -418,7 +420,8 @@ static inline bool bitmap_subset(const unsigned long *src1, return __bitmap_subset(src1, src2, nbits); } -static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) +static __always_inline +bool bitmap_empty(const unsigned long *src, unsigned nbits) { if (small_const_nbits(nbits)) return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -426,7 +429,8 @@ static inline bool bitmap_empty(const unsigned long *src, unsigned nbits) return find_first_bit(src, nbits) == nbits; } -static inline bool bitmap_full(const unsigned long *src, unsigned int nbits) +static __always_inline +bool bitmap_full(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); @@ -460,8 +464,8 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1, return __bitmap_weight_andnot(src1, src2, nbits); } -static __always_inline void bitmap_set(unsigned long *map, unsigned int start, - unsigned int nbits) +static __always_inline +void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __set_bit(start, map); @@ -476,8 +480,8 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start, __bitmap_set(map, start, nbits); } -static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, - unsigned int nbits) +static __always_inline +void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) __clear_bit(start, map); @@ -492,8 +496,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, __bitmap_clear(map, start, nbits); } -static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits) +static __always_inline +void bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; @@ -501,8 +506,9 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s __bitmap_shift_right(dst, src, shift, nbits); } -static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, - unsigned int shift, unsigned int nbits) +static __always_inline +void bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); @@ -510,11 +516,12 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr __bitmap_shift_left(dst, src, shift, nbits); } -static inline void bitmap_replace(unsigned long *dst, - const unsigned long *old, - const unsigned long *new, - const unsigned long *mask, - unsigned int nbits) +static __always_inline +void bitmap_replace(unsigned long *dst, + const unsigned long *old, + const unsigned long *new, + const unsigned long *mask, + unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*old & ~(*mask)) | (*new & *mask); @@ -557,8 +564,9 @@ static inline void bitmap_replace(unsigned long *dst, * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. * See bitmap_scatter() for details related to this relationship. */ -static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, - const unsigned long *mask, unsigned int nbits) +static __always_inline +void bitmap_scatter(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) { unsigned int n = 0; unsigned int bit; @@ -611,8 +619,9 @@ static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, * bitmap_scatter(res, src, mask, n) and a call to * bitmap_scatter(res, result, mask, n) will lead to the same res value. */ -static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, - const unsigned long *mask, unsigned int nbits) +static __always_inline +void bitmap_gather(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) { unsigned int n = 0; unsigned int bit; @@ -623,9 +632,9 @@ static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, __assign_bit(n++, dst, test_bit(bit, src)); } -static inline void bitmap_next_set_region(unsigned long *bitmap, - unsigned int *rs, unsigned int *re, - unsigned int end) +static __always_inline +void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, + unsigned int *re, unsigned int end) { *rs = find_next_bit(bitmap, end, *rs); *re = find_next_zero_bit(bitmap, end, *rs + 1); @@ -640,7 +649,8 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, * This is the complement to __bitmap_find_free_region() and releases * the found region (by clearing it in the bitmap). */ -static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) +static __always_inline +void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order) { bitmap_clear(bitmap, pos, BIT(order)); } @@ -656,7 +666,8 @@ static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos * Returns: 0 on success, or %-EBUSY if specified region wasn't * free (not all bits were zero). */ -static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) +static __always_inline +int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order) { unsigned int len = BIT(order); @@ -680,7 +691,8 @@ static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos * Returns: the bit offset in bitmap of the allocated region, * or -errno on failure. */ -static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) +static __always_inline +int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order) { unsigned int pos, end; /* scans bitmap by regions of size order */ @@ -734,7 +746,7 @@ static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bi * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, * but we expect the lower 32-bits of u64. */ -static inline void bitmap_from_u64(unsigned long *dst, u64 mask) +static __always_inline void bitmap_from_u64(unsigned long *dst, u64 mask) { bitmap_from_arr64(dst, &mask, 64); } @@ -749,9 +761,8 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) * @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return * value is undefined. */ -static inline unsigned long bitmap_read(const unsigned long *map, - unsigned long start, - unsigned long nbits) +static __always_inline +unsigned long bitmap_read(const unsigned long *map, unsigned long start, unsigned long nbits) { size_t index = BIT_WORD(start); unsigned long offset = start % BITS_PER_LONG; @@ -784,8 +795,9 @@ static inline unsigned long bitmap_read(const unsigned long *map, * * For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed. */ -static inline void bitmap_write(unsigned long *map, unsigned long value, - unsigned long start, unsigned long nbits) +static __always_inline +void bitmap_write(unsigned long *map, unsigned long value, + unsigned long start, unsigned long nbits) { size_t index; unsigned long offset; diff --git a/include/linux/bits.h b/include/linux/bits.h index 0eb24d21aac2..60044b608817 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -36,4 +36,19 @@ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __GENMASK_U128() depends on _BIT128() which would not work + * in the asm code, as it shifts an 'unsigned __init128' data + * type instead of direct representation of 128 bit constants + * such as long and unsigned long. The fundamental problem is + * that a 128 bit constant will get silently truncated by the + * gcc compiler. + */ +#define GENMASK_U128(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) +#endif + #endif /* __LINUX_BITS_H */ diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index de98049b7ded..c7eae0bfb013 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -25,9 +25,10 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, } #ifdef CONFIG_BLK_DEV_INTEGRITY -int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, - struct scatterlist *); +int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf, + ssize_t bytes); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) @@ -96,12 +97,17 @@ static inline int blk_rq_count_integrity_sg(struct request_queue *q, { return 0; } -static inline int blk_rq_map_integrity_sg(struct request_queue *q, - struct bio *b, +static inline int blk_rq_map_integrity_sg(struct request *q, struct scatterlist *s) { return 0; } +static inline int blk_rq_integrity_map_user(struct request *rq, + void __user *ubuf, + ssize_t bytes) +{ + return -EINVAL; +} static inline struct blk_integrity *bdev_get_integrity(struct block_device *b) { return NULL; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8d304b1d16b1..c596e0e4cb75 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -149,19 +149,13 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; - -#ifdef CONFIG_BLK_DEV_INTEGRITY unsigned short nr_integrity_segments; -#endif #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct bio_crypt_ctx *crypt_ctx; struct blk_crypto_keyslot *crypt_keyslot; #endif - enum rw_hint write_hint; - unsigned short ioprio; - enum mq_rq_state state; atomic_t ref; @@ -225,7 +219,9 @@ static inline bool blk_rq_is_passthrough(struct request *rq) static inline unsigned short req_get_ioprio(struct request *req) { - return req->ioprio; + if (req->bio) + return req->bio->bi_ioprio; + return 0; } #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) @@ -233,62 +229,61 @@ static inline unsigned short req_get_ioprio(struct request *req) #define rq_dma_dir(rq) \ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) -#define rq_list_add(listptr, rq) do { \ - (rq)->rq_next = *(listptr); \ - *(listptr) = rq; \ -} while (0) - -#define rq_list_add_tail(lastpptr, rq) do { \ - (rq)->rq_next = NULL; \ - **(lastpptr) = rq; \ - *(lastpptr) = &rq->rq_next; \ -} while (0) - -#define rq_list_pop(listptr) \ -({ \ - struct request *__req = NULL; \ - if ((listptr) && *(listptr)) { \ - __req = *(listptr); \ - *(listptr) = __req->rq_next; \ - } \ - __req; \ -}) +static inline int rq_list_empty(const struct rq_list *rl) +{ + return rl->head == NULL; +} -#define rq_list_peek(listptr) \ -({ \ - struct request *__req = NULL; \ - if ((listptr) && *(listptr)) \ - __req = *(listptr); \ - __req; \ -}) +static inline void rq_list_init(struct rq_list *rl) +{ + rl->head = NULL; + rl->tail = NULL; +} -#define rq_list_for_each(listptr, pos) \ - for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) +static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq) +{ + rq->rq_next = NULL; + if (rl->tail) + rl->tail->rq_next = rq; + else + rl->head = rq; + rl->tail = rq; +} -#define rq_list_for_each_safe(listptr, pos, nxt) \ - for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \ - pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL) +static inline void rq_list_add_head(struct rq_list *rl, struct request *rq) +{ + rq->rq_next = rl->head; + rl->head = rq; + if (!rl->tail) + rl->tail = rq; +} -#define rq_list_next(rq) (rq)->rq_next -#define rq_list_empty(list) ((list) == (struct request *) NULL) +static inline struct request *rq_list_pop(struct rq_list *rl) +{ + struct request *rq = rl->head; -/** - * rq_list_move() - move a struct request from one list to another - * @src: The source list @rq is currently in - * @dst: The destination list that @rq will be appended to - * @rq: The request to move - * @prev: The request preceding @rq in @src (NULL if @rq is the head) - */ -static inline void rq_list_move(struct request **src, struct request **dst, - struct request *rq, struct request *prev) + if (rq) { + rl->head = rl->head->rq_next; + if (!rl->head) + rl->tail = NULL; + rq->rq_next = NULL; + } + + return rq; +} + +static inline struct request *rq_list_peek(struct rq_list *rl) { - if (prev) - prev->rq_next = rq->rq_next; - else - *src = rq->rq_next; - rq_list_add(dst, rq); + return rl->head; } +#define rq_list_for_each(rl, pos) \ + for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next) + +#define rq_list_for_each_safe(rl, pos, nxt) \ + for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \ + pos; pos = nxt, nxt = pos ? pos->rq_next : NULL) + /** * enum blk_eh_timer_return - How the timeout handler should proceed * @BLK_EH_DONE: The block driver completed the command or will complete it at @@ -580,7 +575,7 @@ struct blk_mq_ops { * empty the @rqlist completely, then the rest will be queued * individually by the block layer upon return. */ - void (*queue_rqs)(struct request **rqlist); + void (*queue_rqs)(struct rq_list *rqlist); /** * @get_budget: Reserve budget before queue request, once .queue_rq is @@ -860,12 +855,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib); */ static inline bool blk_mq_need_time_stamp(struct request *rq) { - /* - * passthrough io doesn't use iostat accounting, cgroup stats - * and io scheduler functionalities. - */ - if (blk_rq_is_passthrough(rq)) - return false; return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } @@ -895,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, else if (iob->complete != complete) return false; iob->need_ts |= blk_mq_need_time_stamp(req); - rq_list_add(&iob->req_list, req); + rq_list_add_tail(&iob->req_list, req); return true; } @@ -928,6 +917,8 @@ void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); +void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); +void blk_freeze_queue_start_non_owner(struct request_queue *q); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); @@ -992,7 +983,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, rq->nr_phys_segments = nr_segs; rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; - rq->ioprio = bio_prio(bio); } void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 36ed96133217..dce7615c35e7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -71,6 +71,9 @@ struct block_device { struct partition_meta_info *bd_meta_info; int bd_writers; +#ifdef CONFIG_SECURITY + void *bd_security; +#endif /* * keep this out-of-line as it's both big and not needed in the fast * path @@ -248,11 +251,9 @@ struct bio { struct bio_crypt_ctx *bi_crypt_context; #endif - union { #if defined(CONFIG_BLK_DEV_INTEGRITY) - struct bio_integrity_payload *bi_integrity; /* data integrity */ + struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif - }; unsigned short bi_vcnt; /* how many bio_vec's */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b7664d593486..378d3a1a22fc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -25,6 +25,7 @@ #include <linux/uuid.h> #include <linux/xarray.h> #include <linux/file.h> +#include <linux/lockdep.h> struct module; struct request_queue; @@ -194,13 +195,11 @@ struct gendisk { unsigned int nr_zones; unsigned int zone_capacity; unsigned int last_zone_capacity; - unsigned long *conv_zones_bitmap; + unsigned long __rcu *conv_zones_bitmap; unsigned int zone_wplugs_hash_bits; spinlock_t zone_wplugs_lock; struct mempool_s *zone_wplugs_pool; struct hlist_head *zone_wplugs_hash; - struct list_head zone_wplugs_err_list; - struct work_struct zone_wplugs_work; struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -332,6 +331,10 @@ typedef unsigned int __bitwise blk_features_t; #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) +/* stacked device can/does support atomic writes */ +#define BLK_FEAT_ATOMIC_WRITES_STACKED \ + ((__force blk_features_t)(1u << 16)) + /* * Flags automatically inherited when stacking limits. */ @@ -349,6 +352,9 @@ typedef unsigned int __bitwise blk_flags_t; /* I/O topology is misaligned */ #define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1)) +/* passthrough command IO accounting */ +#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2)) + struct queue_limits { blk_features_t features; blk_flags_t flags; @@ -371,6 +377,7 @@ struct queue_limits { unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_hw_zone_append_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -471,6 +478,11 @@ struct request_queue { struct xarray hctx_table; struct percpu_ref q_usage_counter; + struct lock_class_key io_lock_cls_key; + struct lockdep_map io_lockdep_map; + + struct lock_class_key q_lock_cls_key; + struct lockdep_map q_lockdep_map; struct request *last_merge; @@ -566,6 +578,10 @@ struct request_queue { struct throtl_data *td; #endif struct rcu_head rcu_head; +#ifdef CONFIG_LOCKDEP + struct task_struct *mq_freeze_owner; + int mq_freeze_owner_depth; +#endif wait_queue_head_t mq_freeze_wq; /* * Protect concurrent access to q_usage_counter by @@ -617,6 +633,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) +#define blk_queue_passthrough_stat(q) \ + ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME @@ -725,6 +743,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode); int __must_check device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); static inline int __must_check add_disk(struct gendisk *disk) @@ -756,13 +777,13 @@ static inline void bdev_clear_flag(struct block_device *bdev, unsigned flag) atomic_andnot(flag, &bdev->__bd_flags); } -static inline int get_disk_ro(struct gendisk *disk) +static inline bool get_disk_ro(struct gendisk *disk) { return bdev_test_flag(disk->part0, BD_READ_ONLY) || test_bit(GD_READ_ONLY, &disk->state); } -static inline int bdev_read_only(struct block_device *bdev) +static inline bool bdev_read_only(struct block_device *bdev) { return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk); } @@ -929,6 +950,7 @@ queue_limits_start_update(struct request_queue *q) int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim); int queue_limits_set(struct request_queue *q, struct queue_limits *lim); +int blk_validate_limits(struct queue_limits *lim); /** * queue_limits_cancel_update - cancel an atomic update of queue limits @@ -968,8 +990,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q) /* * Access functions for manipulating queue properties */ -extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); -extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, @@ -988,6 +1008,11 @@ extern void blk_put_queue(struct request_queue *); void blk_mark_disk_dead(struct gendisk *disk); +struct rq_list { + struct request *head; + struct request *tail; +}; + #ifdef CONFIG_BLOCK /* * blk_plug permits building a queue of related requests by holding the I/O @@ -1001,10 +1026,10 @@ void blk_mark_disk_dead(struct gendisk *disk); * blk_flush_plug() is called. */ struct blk_plug { - struct request *mq_list; /* blk-mq requests */ + struct rq_list mq_list; /* blk-mq requests */ /* if ios_left is > 1, we can batch tag/rq allocations */ - struct request *cached_rq; + struct rq_list cached_rqs; u64 cur_ktime; unsigned short nr_ios; @@ -1147,6 +1172,11 @@ enum blk_default_limits { */ #define BLK_DEF_MAX_SECTORS_CAP 2560u +static inline struct queue_limits *bdev_limits(struct block_device *bdev) +{ + return &bdev_get_queue(bdev)->limits; +} + static inline unsigned long queue_segment_boundary(const struct request_queue *q) { return q->limits.seg_boundary_mask; @@ -1187,24 +1217,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned int queue_limits_max_zone_append_sectors(struct queue_limits *l) -{ - unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); - - return min_not_zero(l->max_zone_append_sectors, max_sectors); -} - -static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q) -{ - if (!blk_queue_is_zoned(q)) - return 0; - - return queue_limits_max_zone_append_sectors(&q->limits); -} - static inline bool queue_emulates_zone_append(struct request_queue *q) { - return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors; + return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors; } static inline bool bdev_emulates_zone_append(struct block_device *bdev) @@ -1215,7 +1230,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev) static inline unsigned int bdev_max_zone_append_sectors(struct block_device *bdev) { - return queue_max_zone_append_sectors(bdev_get_queue(bdev)); + return bdev_limits(bdev)->max_zone_append_sectors; } static inline unsigned int bdev_max_segments(struct block_device *bdev) @@ -1248,7 +1263,7 @@ static inline unsigned int queue_io_min(const struct request_queue *q) return q->limits.io_min; } -static inline int bdev_io_min(struct block_device *bdev) +static inline unsigned int bdev_io_min(struct block_device *bdev) { return queue_io_min(bdev_get_queue(bdev)); } @@ -1258,7 +1273,7 @@ static inline unsigned int queue_io_opt(const struct request_queue *q) return q->limits.io_opt; } -static inline int bdev_io_opt(struct block_device *bdev) +static inline unsigned int bdev_io_opt(struct block_device *bdev) { return queue_io_opt(bdev_get_queue(bdev)); } @@ -1280,23 +1295,23 @@ unsigned int bdev_discard_alignment(struct block_device *bdev); static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.max_discard_sectors; + return bdev_limits(bdev)->max_discard_sectors; } static inline unsigned int bdev_discard_granularity(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.discard_granularity; + return bdev_limits(bdev)->discard_granularity; } static inline unsigned int bdev_max_secure_erase_sectors(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.max_secure_erase_sectors; + return bdev_limits(bdev)->max_secure_erase_sectors; } static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors; + return bdev_limits(bdev)->max_write_zeroes_sectors; } static inline bool bdev_nonrot(struct block_device *bdev) @@ -1332,7 +1347,7 @@ static inline bool bdev_write_cache(struct block_device *bdev) static inline bool bdev_fua(struct block_device *bdev) { - return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA; + return bdev_limits(bdev)->features & BLK_FEAT_FUA; } static inline bool bdev_nowait(struct block_device *bdev) @@ -1377,7 +1392,37 @@ static inline bool bdev_is_zone_start(struct block_device *bdev, return bdev_offset_from_zone_start(bdev, sector) == 0; } -static inline int queue_dma_alignment(const struct request_queue *q) +/** + * bdev_zone_is_seq - check if a sector belongs to a sequential write zone + * @bdev: block device to check + * @sector: sector number + * + * Check if @sector on @bdev is contained in a sequential write required zone. + */ +static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector) +{ + bool is_seq = false; + +#if IS_ENABLED(CONFIG_BLK_DEV_ZONED) + if (bdev_is_zoned(bdev)) { + struct gendisk *disk = bdev->bd_disk; + unsigned long *bitmap; + + rcu_read_lock(); + bitmap = rcu_dereference(disk->conv_zones_bitmap); + is_seq = !bitmap || + !test_bit(disk_zone_no(disk, sector), bitmap); + rcu_read_unlock(); + } +#endif + + return is_seq; +} + +int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask); + +static inline unsigned int queue_dma_alignment(const struct request_queue *q) { return q->limits.dma_alignment; } @@ -1418,12 +1463,13 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev, bdev_logical_block_size(bdev) - 1); } -static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim) +static inline unsigned int +blk_lim_dma_alignment_and_pad(struct queue_limits *lim) { return lim->dma_alignment | lim->dma_pad_mask; } -static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, +static inline bool blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits); @@ -1541,7 +1587,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } -int bdev_read_only(struct block_device *bdev); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1649,7 +1694,7 @@ int bdev_thaw(struct block_device *bdev); void bdev_fput(struct file *bdev_file); struct io_comp_batch { - struct request *req_list; + struct rq_list req_list; bool need_ts; void (*complete)(struct io_comp_batch *); }; @@ -1675,6 +1720,22 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev) return true; } +static inline unsigned int +bdev_atomic_write_unit_min_bytes(struct block_device *bdev) +{ + if (!bdev_can_atomic_write(bdev)) + return 0; + return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev)); +} + +static inline unsigned int +bdev_atomic_write_unit_max_bytes(struct block_device *bdev) +{ + if (!bdev_can_atomic_write(bdev)) + return 0; + return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev)); +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index cffa38a73618..d8a8d245824a 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -6,11 +6,10 @@ #include <linux/kmemleak.h> /* - * Types for free bootmem stored in page->lru.next. These have to be in - * some random range in unsigned long space for debugging purposes. + * Types for free bootmem stored in the low bits of page->private. */ -enum { - MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, +enum bootmem_type { + MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 1, SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, MIX_SECTION_INFO, NODE_INFO, @@ -21,9 +20,19 @@ enum { void __init register_page_bootmem_info_node(struct pglist_data *pgdat); void get_page_bootmem(unsigned long info, struct page *page, - unsigned long type); + enum bootmem_type type); void put_page_bootmem(struct page *page); +static inline enum bootmem_type bootmem_type(const struct page *page) +{ + return (unsigned long)page->private & 0xf; +} + +static inline unsigned long bootmem_info(const struct page *page) +{ + return (unsigned long)page->private >> 4; +} + /* * Any memory allocated via the memblock allocator and not via the * buddy will be marked reserved already in the memmap. For those @@ -31,7 +40,7 @@ void put_page_bootmem(struct page *page); */ static inline void free_bootmem_page(struct page *page) { - unsigned long magic = page->index; + enum bootmem_type type = bootmem_type(page); /* * The reserve_bootmem_region sets the reserved flag on bootmem @@ -39,7 +48,7 @@ static inline void free_bootmem_page(struct page *page) */ VM_BUG_ON_PAGE(page_ref_count(page) != 2, page); - if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) + if (type == SECTION_INFO || type == MIX_SECTION_INFO) put_page_bootmem(page); else VM_BUG_ON_PAGE(1, page); @@ -53,8 +62,18 @@ static inline void put_page_bootmem(struct page *page) { } +static inline enum bootmem_type bootmem_type(const struct page *page) +{ + return SECTION_INFO; +} + +static inline unsigned long bootmem_info(const struct page *page) +{ + return 0; +} + static inline void get_page_bootmem(unsigned long info, struct page *page, - unsigned long type) + enum bootmem_type type) { } diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index fb3c3e7181e6..7fc69083e745 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -138,7 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, - struct ctl_table *table, int write, + const struct ctl_table *table, int write, char **buf, size_t *pcount, loff_t *ppos, enum cgroup_bpf_attach_type atype); @@ -209,7 +209,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk) { \ typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb) && \ + if (__sk && __sk == skb_to_full_sk(skb) && \ cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS)) \ __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ CGROUP_INET_EGRESS); \ @@ -390,14 +390,6 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - copy_from_sockptr(&__ret, optlen, sizeof(int)); \ - __ret; \ -}) - #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ max_optlen, retval) \ ({ \ @@ -518,7 +510,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3b94ec161e8c..6e63dd3443b9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -203,6 +203,7 @@ enum btf_field_type { BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), BPF_WORKQUEUE = (1 << 10), + BPF_UPTR = (1 << 11), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -294,6 +295,7 @@ struct bpf_map { * same prog type, JITed flag and xdp_has_frags flag. */ struct { + const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; @@ -321,6 +323,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "kptr"; case BPF_KPTR_PERCPU: return "percpu_kptr"; + case BPF_UPTR: + return "uptr"; case BPF_LIST_HEAD: return "bpf_list_head"; case BPF_LIST_NODE: @@ -349,6 +353,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return sizeof(u64); case BPF_LIST_HEAD: return sizeof(struct bpf_list_head); @@ -378,6 +383,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: return __alignof__(u64); case BPF_LIST_HEAD: return __alignof__(struct bpf_list_head); @@ -418,6 +424,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: break; default: WARN_ON_ONCE(1); @@ -506,6 +513,25 @@ static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src bpf_obj_memcpy(map->record, dst, src, map->value_size, true); } +static inline void bpf_obj_swap_uptrs(const struct btf_record *rec, void *dst, void *src) +{ + unsigned long *src_uptr, *dst_uptr; + const struct btf_field *field; + int i; + + if (!btf_record_has_field(rec, BPF_UPTR)) + return; + + for (i = 0, field = rec->fields; i < rec->cnt; i++, field++) { + if (field->type != BPF_UPTR) + continue; + + src_uptr = src + field->offset; + dst_uptr = dst + field->offset; + swap(*src_uptr, *dst_uptr); + } +} + static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size) { u32 curr_off = 0; @@ -634,6 +660,7 @@ enum bpf_type_flag { */ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), + /* MEM can be uninitialized. */ MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), /* DYNPTR points to memory local to the bpf program. */ @@ -694,6 +721,18 @@ enum bpf_type_flag { /* DYNPTR points to xdp_buff */ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS), + /* Memory must be aligned on some architectures, used in combination with + * MEM_FIXED_SIZE. + */ + MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS), + + /* MEM is being written to, often combined with MEM_UNINIT. Non-presence + * of MEM_WRITE means that MEM is only being read. MEM_WRITE without the + * MEM_UNINIT means that memory needs to be initialized since it is also + * read. + */ + MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -731,8 +770,6 @@ enum bpf_arg_type { ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ - ARG_PTR_TO_INT, /* pointer to int */ - ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ @@ -743,7 +780,7 @@ enum bpf_arg_type { ARG_PTR_TO_STACK, /* pointer to stack */ ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ - ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_KPTR_XCHG_DEST, /* pointer to destination that kptrs are bpf_kptr_xchg'd into */ ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, @@ -754,10 +791,10 @@ enum bpf_arg_type { ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, - /* pointer to memory does not need to be initialized, helper function must fill - * all bytes or clear them in error case. + /* Pointer to memory does not need to be initialized, since helper function + * fills all bytes or clears them in error case. */ - ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | MEM_WRITE | ARG_PTR_TO_MEM, /* Pointer to valid memory of size known at compile time. */ ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM, @@ -807,6 +844,12 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; bool might_sleep; + /* set to true if helper follows contract for llvm + * attribute bpf_fastcall: + * - void functions do not scratch r0 + * - functions taking N arguments scratch only registers r1-rN + */ + bool allow_fastcall; enum bpf_return_type ret_type; union { struct { @@ -889,10 +932,6 @@ enum bpf_reg_type { * additional context, assume the value is non-null. */ PTR_TO_BTF_ID, - /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not - * been checked for null. Used primarily to inform the verifier - * an explicit null check is required for this struct. - */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ @@ -905,6 +944,10 @@ enum bpf_reg_type { PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, + /* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not + * been checked for null. Used primarily to inform the verifier + * an explicit null check is required for this struct. + */ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -919,6 +962,7 @@ static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); */ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; + bool is_ldsx; union { int ctx_field_size; struct { @@ -927,6 +971,7 @@ struct bpf_insn_access_aux { }; }; struct bpf_verifier_log *log; /* for verbose logs */ + bool is_retval; /* is accessing function return value ? */ }; static inline void @@ -965,6 +1010,8 @@ struct bpf_verifier_ops { struct bpf_insn_access_aux *info); int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, const struct bpf_prog *prog); + int (*gen_epilogue)(struct bpf_insn *insn, const struct bpf_prog *prog, + s16 ctx_stack_off); int (*gen_ld_abs)(const struct bpf_insn *orig, struct bpf_insn *insn_buf); u32 (*convert_ctx_access)(enum bpf_access_type type, @@ -1278,8 +1325,12 @@ void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -1351,7 +1402,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); /* Called only from JIT-enabled code, so there's no need for stubs. */ -void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym); +void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym); +void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); @@ -1360,12 +1412,14 @@ void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } @@ -1454,6 +1508,7 @@ struct bpf_prog_aux { u32 max_rdwr_access; struct btf *attach_btf; const struct bpf_ctx_arg_aux *ctx_arg_info; + void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; struct bpf_trampoline *dst_trampoline; @@ -1469,7 +1524,14 @@ struct bpf_prog_aux { bool xdp_has_frags; bool exception_cb; bool exception_boundary; + bool is_extended; /* true if extended by freplace program */ + bool jits_use_priv_stack; + bool priv_stack_requested; + bool changes_pkt_data; + u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ + struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; + void (*recursion_detected)(struct bpf_prog *prog); /* callback if recursion is detected */ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1584,6 +1646,11 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; + /* whether BPF link itself has "sleepable" semantics, which can differ + * from underlying BPF program having a "sleepable" semantics, as BPF + * link's semantics is determined by target attach hook + */ + bool sleepable; /* rcu is used before freeing, work can be used to schedule that * RCU-based freeing before that, so they never overlap */ @@ -1600,8 +1667,10 @@ struct bpf_link_ops { */ void (*dealloc)(struct bpf_link *link); /* deallocate link resources callback, called after RCU grace period; - * if underlying BPF program is sleepable we go through tasks trace - * RCU GP and then "classic" RCU GP + * if either the underlying BPF program is sleepable or BPF link's + * target hook is sleepable, we'll go through tasks trace RCU GP and + * then "classic" RCU GP; this need for chaining tasks trace and + * classic RCU GPs is designated by setting bpf_link->sleepable flag */ void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); @@ -1795,6 +1864,7 @@ struct bpf_struct_ops_common_value { #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); +int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, @@ -1851,6 +1921,10 @@ static inline void bpf_module_put(const void *data, struct module *owner) { module_put(owner); } +static inline int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff) +{ + return -ENOTSUPP; +} static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value) @@ -2120,26 +2194,25 @@ bpf_prog_run_array(const struct bpf_prog_array *array, * rcu-protected dynamically sized maps. */ static __always_inline u32 -bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, +bpf_prog_run_array_uprobe(const struct bpf_prog_array *array, const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; - const struct bpf_prog_array *array; struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; u32 ret = 1; might_fault(); + RCU_LOCKDEP_WARN(!rcu_read_lock_trace_held(), "no rcu lock held"); + + if (unlikely(!array)) + return ret; - rcu_read_lock_trace(); migrate_disable(); run_ctx.is_uprobe = true; - array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); - if (unlikely(!array)) - goto out; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { @@ -2154,9 +2227,7 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); -out: migrate_enable(); - rcu_read_unlock_trace(); return ret; } @@ -2227,7 +2298,16 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); -struct bpf_map *__bpf_map_get(struct fd f); + +static inline struct bpf_map *__bpf_map_get(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &bpf_map_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); @@ -2334,6 +2414,9 @@ int bpf_prog_new_fd(struct bpf_prog *prog); void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + bool sleepable); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); @@ -2689,6 +2772,12 @@ static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, { } +static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type, + const struct bpf_link_ops *ops, struct bpf_prog *prog, + bool sleepable) +{ +} + static inline int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) { @@ -3184,7 +3273,9 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; extern const struct bpf_func_proto bpf_get_stack_proto; +extern const struct bpf_func_proto bpf_get_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_task_stack_proto; +extern const struct bpf_func_proto bpf_get_task_stack_sleepable_proto; extern const struct bpf_func_proto bpf_get_stackid_proto_pe; extern const struct bpf_func_proto bpf_get_stack_proto_pe; extern const struct bpf_func_proto bpf_sock_map_update_proto; @@ -3192,6 +3283,7 @@ extern const struct bpf_func_proto bpf_sock_hash_update_proto; extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto; extern const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto; +extern const struct bpf_func_proto bpf_current_task_under_cgroup_proto; extern const struct bpf_func_proto bpf_msg_redirect_hash_proto; extern const struct bpf_func_proto bpf_msg_redirect_map_proto; extern const struct bpf_func_proto bpf_sk_redirect_hash_proto; diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index dcddb0aef7d8..ab7244d8108f 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -77,7 +77,13 @@ struct bpf_local_storage_elem { struct hlist_node map_node; /* Linked to bpf_local_storage_map */ struct hlist_node snode; /* Linked to bpf_local_storage */ struct bpf_local_storage __rcu *local_storage; - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct hlist_node free_node; /* used to postpone + * bpf_selem_free + * after raw_spin_unlock + */ + }; /* 8 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. @@ -181,7 +187,7 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, - bool charge_mem, gfp_t gfp_flags); + bool charge_mem, bool swap_uptrs, gfp_t gfp_flags); void bpf_selem_free(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *smap, @@ -195,7 +201,7 @@ bpf_local_storage_alloc(void *owner, struct bpf_local_storage_data * bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, - void *value, u64 map_flags, gfp_t gfp_flags); + void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags); u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map); diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 1de7ece5d36d..aefcd6564251 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/bpf.h> +#include <linux/bpf_verifier.h> #include <linux/lsm_hooks.h> #ifdef CONFIG_BPF_LSM @@ -45,6 +46,8 @@ void bpf_inode_storage_free(struct inode *inode); void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); +int bpf_lsm_get_retval_range(const struct bpf_prog *prog, + struct bpf_retval_range *range); #else /* !CONFIG_BPF_LSM */ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) @@ -78,6 +81,11 @@ static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, { } +static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog, + struct bpf_retval_range *range) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h index aaf004d94322..e45162ef59bb 100644 --- a/include/linux/bpf_mem_alloc.h +++ b/include/linux/bpf_mem_alloc.h @@ -33,6 +33,9 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size); void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma); +/* Check the allocation size for kmalloc equivalent allocator */ +int bpf_mem_alloc_check_size(bool percpu, size_t size); + /* kmalloc/kfree equivalent: */ void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size); void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9f2a6b83b49e..fa78f49d4a9a 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -146,6 +146,7 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter) BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx) BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit) +BPF_LINK_TYPE(BPF_LINK_TYPE_SOCKMAP, sockmap) #endif #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7b776dae36e5..48b7b2eeb7e2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -23,6 +23,8 @@ * (in the "-8,-16,...,-512" form) */ #define TMP_STR_BUF_LEN 320 +/* Patch buffer size */ +#define INSN_BUF_SIZE 32 /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that @@ -46,22 +48,6 @@ enum bpf_reg_liveness { REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */ }; -/* For every reg representing a map value or allocated object pointer, - * we consider the tuple of (ptr, id) for them to be unique in verifier - * context and conside them to not alias each other for the purposes of - * tracking lock state. - */ -struct bpf_active_lock { - /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, - * there's no active lock held, and other fields have no - * meaning. If non-NULL, it indicates that a lock is held and - * id member has the reg->id of the register which can be >= 0. - */ - void *ptr; - /* This will be reg->id */ - u32 id; -}; - #define ITER_PREFIX "bpf_iter_" enum bpf_iter_state { @@ -264,6 +250,13 @@ struct bpf_stack_state { }; struct bpf_reference_state { + /* Each reference object has a type. Ensure REF_TYPE_PTR is zero to + * default to pointer reference on zero initialization of a state. + */ + enum ref_state_type { + REF_TYPE_PTR = 0, + REF_TYPE_LOCK, + } type; /* Track each reference created with a unique id, even if the same * instruction creates the reference multiple times (eg, via CALL). */ @@ -272,17 +265,10 @@ struct bpf_reference_state { * is used purely to inform the user of a reference leak. */ int insn_idx; - /* There can be a case like: - * main (frame 0) - * cb (frame 1) - * func (frame 3) - * cb (frame 4) - * Hence for frame 4, if callback_ref just stored boolean, it would be - * impossible to distinguish nested callback refs. Hence store the - * frameno and compare that to callback_ref in check_reference_leak when - * exiting a callback function. - */ - int callback_ref; + /* Use to keep track of the source object of a lock, to ensure + * it matches on unlock. + */ + void *ptr; }; struct bpf_retval_range { @@ -330,6 +316,7 @@ struct bpf_func_state { /* The following fields should be last. See copy_func_state() */ int acquired_refs; + int active_locks; struct bpf_reference_state *refs; /* The state of the stack. Each element of the array describes BPF_REG_SIZE * (i.e. 8) bytes worth of stack memory. @@ -347,7 +334,7 @@ struct bpf_func_state { #define MAX_CALL_FRAMES 8 -/* instruction history flags, used in bpf_jmp_history_entry.flags field */ +/* instruction history flags, used in bpf_insn_hist_entry.flags field */ enum { /* instruction references stack slot through PTR_TO_STACK register; * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8) @@ -365,12 +352,16 @@ enum { static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); -struct bpf_jmp_history_entry { +struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ u32 prev_idx : 22; /* special flags, e.g., whether insn is doing register stack spill/load */ u32 flags : 10; + /* additional registers that need precision tracking when this + * jump is backtracked, vector of six 10-bit records + */ + u64 linked_regs; }; /* Maximum number of register states that can exist at once */ @@ -428,7 +419,6 @@ struct bpf_verifier_state { u32 insn_idx; u32 curframe; - struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; u32 active_preempt_lock; @@ -452,13 +442,14 @@ struct bpf_verifier_state { * See get_loop_entry() for more information. */ struct bpf_verifier_state *loop_entry; - /* jmp history recorded from first to last. - * backtracking is using it to go from last to first. - * For most states jmp_history_cnt is [0-3]. + /* Sub-range of env->insn_hist[] corresponding to this state's + * instruction history. + * Backtracking is using it to go from last to first. + * For most states instruction history is short, 0-3 instructions. * For loops can go up to ~40. */ - struct bpf_jmp_history_entry *jmp_history; - u32 jmp_history_cnt; + u32 insn_hist_start; + u32 insn_hist_end; u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; @@ -572,6 +563,14 @@ struct bpf_insn_aux_data { bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ u8 alu_state; /* used in combination with alu_limit */ + /* true if STX or LDX instruction is a part of a spill/fill + * pattern for a bpf_fastcall call. + */ + u8 fastcall_pattern:1; + /* for CALL instructions, a number of spill/fill pairs in the + * bpf_fastcall pattern. + */ + u8 fastcall_spills_num:3; /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ @@ -635,12 +634,22 @@ struct bpf_subprog_arg_info { }; }; +enum priv_stack_mode { + PRIV_STACK_UNKNOWN, + NO_PRIV_STACK, + PRIV_STACK_ADAPTIVE, +}; + struct bpf_subprog_info { /* 'start' has to be the first field otherwise find_subprog() won't work */ u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ u16 stack_extra; + /* offsets in range [stack_depth .. fastcall_stack_off) + * are used for bpf_fastcall spills and fills. + */ + s16 fastcall_stack_off; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; @@ -648,7 +657,11 @@ struct bpf_subprog_info { bool is_async_cb: 1; bool is_exception_cb: 1; bool args_cached: 1; + /* true if bpf_fastcall stack region is used by functions that can't be inlined */ + bool keep_fastcall_stack: 1; + bool changes_pkt_data: 1; + enum priv_stack_mode priv_stack_mode; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; }; @@ -727,7 +740,9 @@ struct bpf_verifier_env { int cur_stack; } cfg; struct backtrack_state bt; - struct bpf_jmp_history_entry *cur_hist_ent; + struct bpf_insn_hist_entry *insn_hist; + struct bpf_insn_hist_entry *cur_hist_ent; + u32 insn_hist_cap; u32 pass_cnt; /* number of times do_check() was called */ u32 subprog_cnt; /* number of instructions analyzed by the verifier */ @@ -762,6 +777,8 @@ struct bpf_verifier_env { * e.g., in reg_type_str() to generate reg_type string */ char tmp_str_buf[TMP_STR_BUF_LEN]; + struct bpf_insn insn_buf[INSN_BUF_SIZE]; + struct bpf_insn epilogue_buf[INSN_BUF_SIZE]; }; static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog) @@ -866,6 +883,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) case BPF_PROG_TYPE_TRACING: return prog->expected_attach_type != BPF_TRACE_ITER; case BPF_PROG_TYPE_STRUCT_OPS: + return prog->aux->jits_use_priv_stack; case BPF_PROG_TYPE_LSM: return false; default: @@ -905,6 +923,11 @@ static inline bool type_is_sk_pointer(enum bpf_reg_type type) type == PTR_TO_XDP_SOCK; } +static inline bool type_may_be_null(u32 type) +{ + return type & PTR_MAYBE_NULL; +} + static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) { env->scratched_regs |= 1U << regno; diff --git a/include/linux/btf.h b/include/linux/btf.h index cffb43133c68..4214e76c9168 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -75,6 +75,7 @@ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ +#define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the @@ -580,6 +581,17 @@ bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); +int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx); + +static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) +{ + if (!btf_type_is_ptr(t)) + return false; + + t = btf_type_skip_modifiers(btf, t->type, NULL); + + return btf_type_is_struct(t); +} #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -654,16 +666,9 @@ static inline bool btf_types_are_same(const struct btf *btf1, u32 id1, { return false; } -#endif - -static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) +static inline int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx) { - if (!btf_type_is_ptr(t)) - return false; - - t = btf_type_skip_modifiers(btf, t->type, NULL); - - return btf_type_is_struct(t); + return -EOPNOTSUPP; } - +#endif #endif diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index c0e3e1426a82..139bdececdcf 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -283,5 +283,6 @@ extern u32 btf_tracing_ids[]; extern u32 bpf_cgroup_btf_id[]; extern u32 bpf_local_storage_map_btf_id[]; extern u32 btf_bpf_map_id[]; +extern u32 bpf_kmem_cache_btf_id[]; #endif diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 14acf1bbe0ce..932139c5d46f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -199,8 +199,7 @@ void folio_set_bh(struct buffer_head *bh, struct folio *folio, unsigned long offset); struct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, gfp_t gfp); -struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, - bool retry); +struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size); struct buffer_head *create_empty_buffers(struct folio *folio, unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); @@ -258,18 +257,18 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, int block_read_full_folio(struct folio *, get_block_t *); bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, get_block_t *get_block); -int __block_write_begin(struct page *page, loff_t pos, unsigned len, + struct folio **foliop, get_block_t *get_block); +int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); + loff_t, unsigned len, unsigned copied, + struct folio *, void *); int generic_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); + loff_t, unsigned len, unsigned copied, + struct folio *, void *); void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); int cont_write_begin(struct file *, struct address_space *, loff_t, - unsigned, struct page **, void **, + unsigned, struct folio **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); void block_commit_write(struct page *page, unsigned int from, unsigned int to); diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 20aa3c2d89f7..014a88c41073 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -7,8 +7,8 @@ #define BUILD_ID_SIZE_MAX 20 struct vm_area_struct; -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, - __u32 *size); +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); +int build_id_parse_nofault(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index ee1d0e5f9789..2d7d86f0290d 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -808,7 +808,7 @@ struct ceph_mds_caps { struct ceph_mds_cap_peer { __le64 cap_id; - __le32 seq; + __le32 issue_seq; __le32 mseq; __le32 mds; __u8 flags; @@ -822,7 +822,7 @@ struct ceph_mds_cap_release { struct ceph_mds_cap_item { __le64 ino; __le64 cap_id; - __le32 migrate_seq, seq; + __le32 migrate_seq, issue_seq; } __attribute__ ((packed)); #define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 04f3ace5787b..8fc1aed64113 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -6,7 +6,7 @@ #include <linux/bug.h> #include <linux/slab.h> #include <linux/time.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/ceph/types.h> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 4497d0a6772c..733e7f93db66 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -4,7 +4,7 @@ #include <linux/ceph/ceph_debug.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/backing-dev.h> #include <linux/completion.h> #include <linux/exportfs.h> @@ -317,12 +317,6 @@ extern void ceph_release_page_vector(struct page **pages, int num_pages); extern void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); -extern int ceph_copy_user_to_page_vector(struct page **pages, - const void __user *data, - loff_t off, size_t len); -extern void ceph_copy_to_page_vector(struct page **pages, - const void *data, - loff_t off, size_t len); extern void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f66f6aac74f6..d55b30057a45 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -449,8 +449,6 @@ extern int ceph_osdc_init(struct ceph_osd_client *osdc, extern void ceph_osdc_stop(struct ceph_osd_client *osdc); extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); -extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, - struct ceph_msg *msg); extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); @@ -628,8 +626,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, u32 timeout, struct page ***preply_pages, size_t *preply_len); -int ceph_osdc_watch_check(struct ceph_osd_client *osdc, - struct ceph_osd_linger_request *lreq); int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index 5dead8486fd8..879bec0863aa 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -17,12 +17,6 @@ struct ceph_pagelist { refcount_t refcnt; }; -struct ceph_pagelist_cursor { - struct ceph_pagelist *pl; /* pagelist, for error checking */ - struct list_head *page_lru; /* page in list */ - size_t room; /* room remaining to reset to */ -}; - struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags); extern void ceph_pagelist_release(struct ceph_pagelist *pl); @@ -33,12 +27,6 @@ extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space); extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl); -extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c); - -extern int ceph_pagelist_truncate(struct ceph_pagelist *pl, - struct ceph_pagelist_cursor *c); - static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) { __le64 ev = cpu_to_le64(v); diff --git a/include/linux/cfag12864b.h b/include/linux/cfag12864b.h index 6617d9c68d86..83e6613d12ae 100644 --- a/include/linux/cfag12864b.h +++ b/include/linux/cfag12864b.h @@ -28,13 +28,6 @@ extern unsigned char * cfag12864b_buffer; /* - * Get the refresh rate of the LCD - * - * Returns the refresh rate (hertz). - */ -extern unsigned int cfag12864b_getrate(void); - -/* * Enable refreshing * * Returns 0 if successful (anyone was using it), @@ -50,16 +43,6 @@ extern unsigned char cfag12864b_enable(void); extern void cfag12864b_disable(void); /* - * Is enabled refreshing? (is anyone using the module?) - * - * Returns 0 if refreshing is not enabled (anyone is using it), - * or != 0 if refreshing is enabled (someone is using it). - * - * Useful for buffer read-only modules. - */ -extern unsigned char cfag12864b_isenabled(void); - -/* * Is the module inited? */ extern unsigned char cfag12864b_isinited(void); diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ae04035b6cbe..1b20d2d8ef7c 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -172,7 +172,11 @@ struct cgroup_subsys_state { /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; - /* siblings list anchored at the parent's ->children */ + /* + * siblings list anchored at the parent's ->children + * + * linkage is protected by cgroup_mutex or RCU + */ struct list_head sibling; struct list_head children; @@ -210,6 +214,14 @@ struct cgroup_subsys_state { * fields of the containing structure. */ struct cgroup_subsys_state *parent; + + /* + * Keep track of total numbers of visible descendant CSSes. + * The total number of dying CSSes is tracked in + * css->cgroup->nr_dying_subsys[ssid]. + * Protected by cgroup_mutex. + */ + int nr_descendants; }; /* @@ -315,6 +327,7 @@ struct cgroup_base_stat { #ifdef CONFIG_SCHED_CORE u64 forceidle_sum; #endif + u64 ntime; }; /* @@ -385,7 +398,7 @@ struct cgroup_freezer_state { bool freeze; /* Should the cgroup actually be frozen? */ - int e_freeze; + bool e_freeze; /* Fields below are protected by css_set_lock */ @@ -470,6 +483,12 @@ struct cgroup { /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; + /* + * Keep track of total number of dying CSSes at and below this cgroup. + * Protected by cgroup_mutex. + */ + int nr_dying_subsys[CGROUP_SUBSYS_COUNT]; + struct cgroup_root *root; /* @@ -775,6 +794,11 @@ struct cgroup_subsys { extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; +struct cgroup_of_peak { + unsigned long value; + struct list_head list; +}; + /** * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups * @tsk: target task diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c60ba0ab1462..f8ef47f8a634 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/nodemask.h> +#include <linux/list.h> #include <linux/rculist.h> #include <linux/cgroupstats.h> #include <linux/fs.h> @@ -28,8 +29,6 @@ struct kernel_clone_args; -#ifdef CONFIG_CGROUPS - /* * All weight knobs on the default hierarchy should use the following min, * default and max values. The default value is the logarithmic center of @@ -39,6 +38,8 @@ struct kernel_clone_args; #define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_MAX 10000 +#ifdef CONFIG_CGROUPS + enum { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ @@ -854,4 +855,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {} struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id); +struct cgroup_of_peak *of_peak(struct kernfs_open_file *of); + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index d9e613803df1..ec00e3f7af2b 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -4,6 +4,142 @@ #include <linux/compiler.h> +/** + * DOC: scope-based cleanup helpers + * + * The "goto error" pattern is notorious for introducing subtle resource + * leaks. It is tedious and error prone to add new resource acquisition + * constraints into code paths that already have several unwind + * conditions. The "cleanup" helpers enable the compiler to help with + * this tedium and can aid in maintaining LIFO (last in first out) + * unwind ordering to avoid unintentional leaks. + * + * As drivers make up the majority of the kernel code base, here is an + * example of using these helpers to clean up PCI drivers. The target of + * the cleanups are occasions where a goto is used to unwind a device + * reference (pci_dev_put()), or unlock the device (pci_dev_unlock()) + * before returning. + * + * The DEFINE_FREE() macro can arrange for PCI device references to be + * dropped when the associated variable goes out of scope:: + * + * DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T)) + * ... + * struct pci_dev *dev __free(pci_dev_put) = + * pci_get_slot(parent, PCI_DEVFN(0, 0)); + * + * The above will automatically call pci_dev_put() if @dev is non-NULL + * when @dev goes out of scope (automatic variable scope). If a function + * wants to invoke pci_dev_put() on error, but return @dev (i.e. without + * freeing it) on success, it can do:: + * + * return no_free_ptr(dev); + * + * ...or:: + * + * return_ptr(dev); + * + * The DEFINE_GUARD() macro can arrange for the PCI device lock to be + * dropped when the scope where guard() is invoked ends:: + * + * DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) + * ... + * guard(pci_dev)(dev); + * + * The lifetime of the lock obtained by the guard() helper follows the + * scope of automatic variable declaration. Take the following example:: + * + * func(...) + * { + * if (...) { + * ... + * guard(pci_dev)(dev); // pci_dev_lock() invoked here + * ... + * } // <- implied pci_dev_unlock() triggered here + * } + * + * Observe the lock is held for the remainder of the "if ()" block not + * the remainder of "func()". + * + * Now, when a function uses both __free() and guard(), or multiple + * instances of __free(), the LIFO order of variable definition order + * matters. GCC documentation says: + * + * "When multiple variables in the same scope have cleanup attributes, + * at exit from the scope their associated cleanup functions are run in + * reverse order of definition (last defined, first cleanup)." + * + * When the unwind order matters it requires that variables be defined + * mid-function scope rather than at the top of the file. Take the + * following example and notice the bug highlighted by "!!":: + * + * LIST_HEAD(list); + * DEFINE_MUTEX(lock); + * + * struct object { + * struct list_head node; + * }; + * + * static struct object *alloc_add(void) + * { + * struct object *obj; + * + * lockdep_assert_held(&lock); + * obj = kzalloc(sizeof(*obj), GFP_KERNEL); + * if (obj) { + * LIST_HEAD_INIT(&obj->node); + * list_add(obj->node, &list): + * } + * return obj; + * } + * + * static void remove_free(struct object *obj) + * { + * lockdep_assert_held(&lock); + * list_del(&obj->node); + * kfree(obj); + * } + * + * DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T)) + * static int init(void) + * { + * struct object *obj __free(remove_free) = NULL; + * int err; + * + * guard(mutex)(&lock); + * obj = alloc_add(); + * + * if (!obj) + * return -ENOMEM; + * + * err = other_init(obj); + * if (err) + * return err; // remove_free() called without the lock!! + * + * no_free_ptr(obj); + * return 0; + * } + * + * That bug is fixed by changing init() to call guard() and define + + * initialize @obj in this order:: + * + * guard(mutex)(&lock); + * struct object *obj __free(remove_free) = alloc_add(); + * + * Given that the "__free(...) = NULL" pattern for variables defined at + * the top of the function poses this potential interdependency problem + * the recommendation is to always define and assign variables in one + * statement and not group variable definitions at the top of the + * function when __free() is used. + * + * Lastly, given that the benefit of cleanup helpers is removal of + * "goto", and that the "goto" statement can jump between scopes, the + * expectation is that usage of "goto" and cleanup helpers is never + * mixed in the same function. I.e. for a given routine, convert all + * resources that need a "goto" cleanup to scope-based cleanup, or + * convert none of them. + */ + /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() @@ -98,7 +234,7 @@ const volatile void * __must_check_fn(const volatile void *val) * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd) * * CLASS(fdget, f)(fd); - * if (!f.file) + * if (fd_empty(f)) * return -EBADF; * * // use 'f' without concern @@ -149,14 +285,20 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * similar to scoped_guard(), except it does fail when the lock * acquire fails. * + * Only for conditional locks. */ +#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ +static __maybe_unused const bool class_##_name##_is_conditional = _is_cond + #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return *_T; } + { return (void *)(__force unsigned long)*_T; } #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ class_##_name##_t _T) \ @@ -167,16 +309,40 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ CLASS(_name, __UNIQUE_ID(guard)) #define __guard_ptr(_name) class_##_name##_lock_ptr +#define __is_cond_ptr(_name) class_##_name##_is_conditional + +/* + * Helper macro for scoped_guard(). + * + * Note that the "!__is_cond_ptr(_name)" part of the condition ensures that + * compiler would be sure that for the unconditional locks the body of the + * loop (caller-provided code glued to the else clause) could not be skipped. + * It is needed because the other part - "__guard_ptr(_name)(&scope)" - is too + * hard to deduce (even if could be proven true for unconditional locks). + */ +#define __scoped_guard(_name, _label, args...) \ + for (CLASS(_name, scope)(args); \ + __guard_ptr(_name)(&scope) || !__is_cond_ptr(_name); \ + ({ goto _label; })) \ + if (0) { \ +_label: \ + break; \ + } else + +#define scoped_guard(_name, args...) \ + __scoped_guard(_name, __UNIQUE_ID(label), args) -#define scoped_guard(_name, args...) \ - for (CLASS(_name, scope)(args), \ - *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) +#define __scoped_cond_guard(_name, _fail, _label, args...) \ + for (CLASS(_name, scope)(args); true; ({ goto _label; })) \ + if (!__guard_ptr(_name)(&scope)) { \ + BUILD_BUG_ON(!__is_cond_ptr(_name)); \ + _fail; \ +_label: \ + break; \ + } else -#define scoped_cond_guard(_name, _fail, args...) \ - for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) \ - if (!__guard_ptr(_name)(&scope)) _fail; \ - else +#define scoped_cond_guard(_name, _fail, args...) \ + __scoped_cond_guard(_name, _fail, __UNIQUE_ID(label), args) /* * Additional helper macros for generating lock guards with types, either for @@ -211,7 +377,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ \ static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ { \ - return _T->lock; \ + return (void *)(__force unsigned long)_T->lock; \ } @@ -233,14 +399,17 @@ static inline class_##_name##_t class_##_name##_constructor(void) \ } #define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \ +__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) #define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ +__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) #define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ EXTEND_CLASS(_name, _ext, \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ if (_T->lock && !(_condlock)) _T->lock = NULL; \ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 4a537260f655..2e6e603b7493 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -394,6 +394,20 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, __clk_hw_register_fixed_rate((dev), NULL, (name), (parent_name), NULL, \ NULL, (flags), (fixed_rate), 0, 0, true) /** + * devm_clk_hw_register_fixed_rate_parent_data - register fixed-rate clock with + * the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_data: parent clk data + * @flags: framework-specific flags + * @fixed_rate: non-adjustable clock rate + */ +#define devm_clk_hw_register_fixed_rate_parent_data(dev, name, parent_data, flags, \ + fixed_rate) \ + __clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL, \ + (parent_data), (flags), (fixed_rate), 0, \ + 0, true) +/** * clk_hw_register_fixed_rate_parent_hw - register fixed-rate clock with * the clock framework * @dev: device that is registering this clock @@ -609,6 +623,24 @@ struct clk *clk_register_gate(struct device *dev, const char *name, NULL, (flags), (reg), (bit_idx), \ (clk_gate_flags), (lock)) /** + * devm_clk_hw_register_gate_parent_hw - register a gate clock with the clock + * framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_hw: pointer to parent clk + * @flags: framework-specific flags for this clock + * @reg: register address to control gating of this clock + * @bit_idx: which bit in the register controls gating of this clock + * @clk_gate_flags: gate-specific flags for this clock + * @lock: shared register lock for this clock + */ +#define devm_clk_hw_register_gate_parent_hw(dev, name, parent_hw, flags, \ + reg, bit_idx, clk_gate_flags, \ + lock) \ + __devm_clk_hw_register_gate((dev), NULL, (name), NULL, (parent_hw), \ + NULL, (flags), (reg), (bit_idx), \ + (clk_gate_flags), (lock)) +/** * devm_clk_hw_register_gate_parent_data - register a gate clock with the * clock framework * @dev: device that is registering this clock @@ -675,13 +707,15 @@ struct clk_div_table { * CLK_DIVIDER_BIG_ENDIAN - By default little endian register accesses are used * for the divider register. Setting this flag makes the register accesses * big endian. + * CLK_DIVIDER_EVEN_INTEGERS - clock divisor is 2, 4, 6, 8, 10, etc. + * Formula is 2 * (value read from hardware + 1). */ struct clk_divider { struct clk_hw hw; void __iomem *reg; u8 shift; u8 width; - u8 flags; + u16 flags; const struct clk_div_table *table; spinlock_t *lock; }; @@ -697,6 +731,7 @@ struct clk_divider { #define CLK_DIVIDER_READ_ONLY BIT(5) #define CLK_DIVIDER_MAX_AT_ZERO BIT(6) #define CLK_DIVIDER_BIG_ENDIAN BIT(7) +#define CLK_DIVIDER_EVEN_INTEGERS BIT(8) extern const struct clk_ops clk_divider_ops; extern const struct clk_ops clk_divider_ro_ops; @@ -726,19 +761,21 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); struct clk_hw *__devm_clk_hw_register_divider(struct device *dev, struct device_node *np, const char *name, const char *parent_name, const struct clk_hw *parent_hw, const struct clk_parent_data *parent_data, unsigned long flags, - void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, + void __iomem *reg, u8 shift, u8 width, + unsigned long clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); struct clk *clk_register_divider_table(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, - u8 clk_divider_flags, const struct clk_div_table *table, - spinlock_t *lock); + unsigned long clk_divider_flags, + const struct clk_div_table *table, spinlock_t *lock); /** * clk_register_divider - register a divider clock with the clock framework * @dev: device registering this clock @@ -1123,6 +1160,9 @@ struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device * struct device_node *np, const char *name, const char *fw_name, unsigned long flags, unsigned int mult, unsigned int div, unsigned long acc); +struct clk_hw *clk_hw_register_fixed_factor_index(struct device *dev, + const char *name, unsigned int index, unsigned long flags, + unsigned int mult, unsigned int div); void clk_hw_unregister_fixed_factor(struct clk_hw *hw); struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, @@ -1346,7 +1386,6 @@ unsigned long clk_hw_get_flags(const struct clk_hw *hw); (clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT) bool clk_hw_is_prepared(const struct clk_hw *hw); -bool clk_hw_rate_is_protected(const struct clk_hw *hw); bool clk_hw_is_enabled(const struct clk_hw *hw); bool __clk_is_enabled(struct clk *clk); struct clk *__clk_lookup(const char *name); diff --git a/include/linux/clk.h b/include/linux/clk.h index 0fa56d672532..1dcee6d701e4 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -496,11 +496,13 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** - * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed) + * devm_clk_bulk_get_all_enabled - Get and enable all clocks of the consumer (managed) * @dev: device for clock "consumer" * @clks: pointer to the clk_bulk_data table of consumer * - * Returns success (0) or negative errno. + * Returns a positive value for the number of clocks obtained while the + * clock references are stored in the clk_bulk_data table in @clks field. + * Returns 0 if there're none and a negative value if something failed. * * This helper function allows drivers to get all clocks of the * consumer and enables them in one operation with management. @@ -508,8 +510,8 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, * is unbound. */ -int __must_check devm_clk_bulk_get_all_enable(struct device *dev, - struct clk_bulk_data **clks); +int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, + struct clk_bulk_data **clks); /** * devm_clk_get - lookup and obtain a managed reference to a clock producer. @@ -641,6 +643,32 @@ struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id); struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id); /** + * devm_clk_get_optional_enabled_with_rate - devm_clk_get_optional() + + * clk_set_rate() + + * clk_prepare_enable() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * @rate: new clock rate + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. If no such clk is found, it returns NULL + * which serves as a dummy clk. That's the only difference compared + * to devm_clk_get_enabled(). + * + * The returned clk (if valid) is prepared and enabled and rate was set. + * + * The clock will automatically be disabled, unprepared and freed + * when the device is unbound from the bus. + */ +struct clk *devm_clk_get_optional_enabled_with_rate(struct device *dev, + const char *id, + unsigned long rate); + +/** * devm_get_clk_from_child - lookup and obtain a managed reference to a * clock producer from child node. * @dev: device for clock "consumer" @@ -982,6 +1010,13 @@ static inline struct clk *devm_clk_get_optional_enabled(struct device *dev, return NULL; } +static inline struct clk * +devm_clk_get_optional_enabled_with_rate(struct device *dev, const char *id, + unsigned long rate) +{ + return NULL; +} + static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { @@ -1001,7 +1036,7 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev, return 0; } -static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev, +static inline int __must_check devm_clk_bulk_get_all_enabled(struct device *dev, struct clk_bulk_data **clks) { return 0; @@ -1103,6 +1138,15 @@ static inline void clk_restore_context(void) {} #endif +/* Deprecated. Use devm_clk_bulk_get_all_enabled() */ +static inline int __must_check +devm_clk_bulk_get_all_enable(struct device *dev, struct clk_bulk_data **clks) +{ + int ret = devm_clk_bulk_get_all_enabled(dev, clks); + + return ret > 0 ? 0 : ret; +} + /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */ static inline int clk_prepare_enable(struct clk *clk) { diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d35b677b08fe..65b7c41471c3 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -49,6 +49,7 @@ struct module; * @archdata: Optional arch-specific data * @max_cycles: Maximum safe cycle value which won't overflow on * multiplication + * @max_raw_delta: Maximum safe delta value for negative motion detection * @name: Pointer to clocksource name * @list: List head for registration (internal) * @freq_khz: Clocksource frequency in khz. @@ -109,6 +110,7 @@ struct clocksource { struct arch_clocksource_data archdata; #endif u64 max_cycles; + u64 max_raw_delta; const char *name; struct list_head list; u32 freq_khz; @@ -215,7 +217,6 @@ static inline s64 clocksource_cyc2ns(u64 cycles, u32 mult, u32 shift) extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); -extern void clocksource_change_rating(struct clocksource *cs, int rating); extern void clocksource_suspend(void); extern void clocksource_resume(void); extern struct clocksource * __init clocksource_default_clock(void); diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index 2bb4d8c2f1b0..c4ef4ae2eded 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -6,6 +6,7 @@ enum clocksource_ids { CSID_GENERIC = 0, CSID_ARM_ARCH_COUNTER, + CSID_S390_TOD, CSID_X86_TSC_EARLY, CSID_X86_TSC, CSID_X86_KVM_CLK, diff --git a/include/linux/closure.h b/include/linux/closure.h index 2af44427107d..880fe85e35e9 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -454,4 +454,39 @@ do { \ __closure_wait_event(waitlist, _cond); \ } while (0) +#define __closure_wait_event_timeout(waitlist, _cond, _until) \ +({ \ + struct closure cl; \ + long _t; \ + \ + closure_init_stack(&cl); \ + \ + while (1) { \ + closure_wait(waitlist, &cl); \ + if (_cond) { \ + _t = max_t(long, 1L, _until - jiffies); \ + break; \ + } \ + _t = max_t(long, 0L, _until - jiffies); \ + if (!_t) \ + break; \ + closure_sync_timeout(&cl, _t); \ + } \ + closure_wake_up(waitlist); \ + closure_sync(&cl); \ + _t; \ +}) + +/* + * Returns 0 if timeout expired, remaining time in jiffies (at least 1) if + * condition became true + */ +#define closure_wait_event_timeout(waitlist, _cond, _timeout) \ +({ \ + unsigned long _until = jiffies + _timeout; \ + (_cond) \ + ? max_t(long, 1L, _until - jiffies) \ + : __closure_wait_event_timeout(waitlist, _cond, _until);\ +}) + #endif /* _LINUX_CLOSURE_H */ diff --git a/include/linux/cma.h b/include/linux/cma.h index 9db877506ea8..d15b64f51336 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -52,4 +52,20 @@ extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); extern void cma_reserve_pages_on_error(struct cma *cma); + +#ifdef CONFIG_CMA +struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); +bool cma_free_folio(struct cma *cma, const struct folio *folio); +#else +static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) +{ + return NULL; +} + +static inline bool cma_free_folio(struct cma *cma, const struct folio *folio) +{ + return false; +} +#endif + #endif diff --git a/include/linux/codetag.h b/include/linux/codetag.h index c2a579ccd455..d14dbd26b370 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -13,6 +13,9 @@ struct codetag_module; struct seq_buf; struct module; +#define CODETAG_SECTION_START_PREFIX "__start_" +#define CODETAG_SECTION_STOP_PREFIX "__stop_" + /* * An instance of this structure is created in a special ELF section at every * code location being tagged. At runtime, the special section is treated as @@ -35,8 +38,15 @@ struct codetag_type_desc { size_t tag_size; void (*module_load)(struct codetag_type *cttype, struct codetag_module *cmod); - bool (*module_unload)(struct codetag_type *cttype, + void (*module_unload)(struct codetag_type *cttype, struct codetag_module *cmod); +#ifdef CONFIG_MODULES + void (*module_replaced)(struct module *mod, struct module *new_mod); + bool (*needs_section_mem)(struct module *mod, unsigned long size); + void *(*alloc_section_mem)(struct module *mod, unsigned long size, + unsigned int prepend, unsigned long align); + void (*free_section_mem)(struct module *mod, bool used); +#endif }; struct codetag_iterator { @@ -71,11 +81,31 @@ struct codetag_type * codetag_register_type(const struct codetag_type_desc *desc); #if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) + +bool codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size); +void *codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align); +void codetag_free_module_sections(struct module *mod); +void codetag_module_replaced(struct module *mod, struct module *new_mod); void codetag_load_module(struct module *mod); -bool codetag_unload_module(struct module *mod); -#else +void codetag_unload_module(struct module *mod); + +#else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ + +static inline bool +codetag_needs_module_section(struct module *mod, const char *name, + unsigned long size) { return false; } +static inline void * +codetag_alloc_module_section(struct module *mod, const char *name, + unsigned long size, unsigned int prepend, + unsigned long align) { return NULL; } +static inline void codetag_free_module_sections(struct module *mod) {} +static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} static inline void codetag_load_module(struct module *mod) {} -static inline bool codetag_unload_module(struct module *mod) { return true; } -#endif +static inline void codetag_unload_module(struct module *mod) {} + +#endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ #endif /* _LINUX_CODETAG_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 4c1a39dcb624..2e7c2c282f3a 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead." +#error "Please do not include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead." #endif /* Compiler specific definitions for Clang compiler */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f805adaa316e..d0ed9583743f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." +#error "Please do not include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." #endif /* @@ -80,7 +80,11 @@ #define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif +#ifdef __SANITIZE_HWADDRESS__ +#define __no_sanitize_address __attribute__((__no_sanitize__("hwaddress"))) +#else #define __no_sanitize_address __attribute__((__no_sanitize_address__)) +#endif #if defined(__SANITIZE_THREAD__) #define __no_sanitize_thread __attribute__((__no_sanitize_thread__)) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2df665fa2964..469a64dd6495 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -133,7 +133,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define annotate_unreachable() __annotate_unreachable(__COUNTER__) /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table") +#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") #else /* !CONFIG_OBJTOOL */ #define annotate_reachable() @@ -239,8 +239,18 @@ static inline void *offset_to_ptr(const int *off) #endif /* __ASSEMBLY__ */ +#ifdef __CHECKER__ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) +#else /* __CHECKER__ */ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif /* __CHECKER__ */ + /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") + +/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ +#define __must_be_cstr(p) \ + __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") /* * This returns a constant expression while determining if an argument is diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 32284cd26d52..c16d4199bf92 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -95,19 +95,6 @@ #endif /* - * Optional: only supported since gcc >= 15 - * Optional: only supported since clang >= 18 - * - * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://github.com/llvm/llvm-project/pull/76348 - */ -#if __has_attribute(__counted_by__) -# define __counted_by(member) __attribute__((__counted_by__(member))) -#else -# define __counted_by(member) -#endif - -/* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 * diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index f14c275950b5..981cc3d7e3aa 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -266,6 +266,12 @@ struct ftrace_likely_data { #define noinline_for_stack noinline /* + * Use noinline_for_tracing for functions that should not be inlined. + * For tracing reasons. + */ +#define noinline_for_tracing noinline + +/* * Sanitizer helper attributes: Because using __always_inline and * __no_sanitize_* conflict, provide helper attributes that will either expand * to __no_sanitize_* in compilation units where instrumentation is enabled @@ -324,6 +330,25 @@ struct ftrace_likely_data { #endif /* + * Optional: only supported since gcc >= 15 + * Optional: only supported since clang >= 18 + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 + * clang: https://github.com/llvm/llvm-project/pull/76348 + * + * __bdos on clang < 19.1.2 can erroneously return 0: + * https://github.com/llvm/llvm-project/pull/110497 + * + * __bdos on clang < 19.1.3 can be off by 4: + * https://github.com/llvm/llvm-project/pull/112636 + */ +#ifdef CONFIG_CC_HAS_COUNTED_BY +# define __counted_by(member) __attribute__((__counted_by__(member))) +#else +# define __counted_by(member) +#endif + +/* * Apply __counted_by() when the Endianness matches to increase test coverage. */ #ifdef __LITTLE_ENDIAN @@ -421,6 +446,13 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif +/* Determine if an attribute has been applied to a variable. */ +#if __has_builtin(__builtin_has_attribute) +#define __annotated(var, attr) __builtin_has_attribute(var, attr) +#else +#define __annotated(var, attr) (false) +#endif + /* * Some versions of gcc do not mark 'asm goto' volatile: * diff --git a/include/linux/console.h b/include/linux/console.h index 31a8f5b85f5d..eba367bf605d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -16,7 +16,9 @@ #include <linux/atomic.h> #include <linux/bits.h> +#include <linux/irq_work.h> #include <linux/rculist.h> +#include <linux/rcuwait.h> #include <linux/types.h> #include <linux/vesa.h> @@ -303,7 +305,7 @@ struct nbcon_write_context { /** * struct console - The console descriptor structure * @name: The name of the console driver - * @write: Write callback to output messages (Optional) + * @write: Legacy write callback to output messages (Optional) * @read: Read callback for console input (Optional) * @device: The underlying TTY device driver (Optional) * @unblank: Callback to unblank the console (Optional) @@ -320,10 +322,14 @@ struct nbcon_write_context { * @data: Driver private data * @node: hlist node for the console list * - * @write_atomic: Write callback for atomic context * @nbcon_state: State for nbcon consoles * @nbcon_seq: Sequence number of the next record for nbcon to print + * @nbcon_device_ctxt: Context available for non-printing operations + * @nbcon_prev_seq: Seq num the previous nbcon owner was assigned to print * @pbufs: Pointer to nbcon private buffer + * @kthread: Printer kthread for this console + * @rcuwait: RCU-safe wait object for @kthread waking + * @irq_work: Defer @kthread waking to IRQ work context */ struct console { char name[16]; @@ -345,11 +351,121 @@ struct console { struct hlist_node node; /* nbcon console specific members */ - bool (*write_atomic)(struct console *con, - struct nbcon_write_context *wctxt); + + /** + * @write_atomic: + * + * NBCON callback to write out text in any context. (Optional) + * + * This callback is called with the console already acquired. However, + * a higher priority context is allowed to take it over by default. + * + * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe() + * around any code where the takeover is not safe, for example, when + * manipulating the serial port registers. + * + * nbcon_enter_unsafe() will fail if the context has lost the console + * ownership in the meantime. In this case, the callback is no longer + * allowed to go forward. It must back out immediately and carefully. + * The buffer content is also no longer trusted since it no longer + * belongs to the context. + * + * The callback should allow the takeover whenever it is safe. It + * increases the chance to see messages when the system is in trouble. + * If the driver must reacquire ownership in order to finalize or + * revert hardware changes, nbcon_reacquire_nobuf() can be used. + * However, on reacquire the buffer content is no longer available. A + * reacquire cannot be used to resume printing. + * + * The callback can be called from any context (including NMI). + * Therefore it must avoid usage of any locking and instead rely + * on the console ownership for synchronization. + */ + void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt); + + /** + * @write_thread: + * + * NBCON callback to write out text in task context. + * + * This callback must be called only in task context with both + * device_lock() and the nbcon console acquired with + * NBCON_PRIO_NORMAL. + * + * The same rules for console ownership verification and unsafe + * sections handling applies as with write_atomic(). + * + * The console ownership handling is necessary for synchronization + * against write_atomic() which is synchronized only via the context. + * + * The device_lock() provides the primary serialization for operations + * on the device. It might be as relaxed (mutex)[*] or as tight + * (disabled preemption and interrupts) as needed. It allows + * the kthread to operate in the least restrictive mode[**]. + * + * [*] Standalone nbcon_context_try_acquire() is not safe with + * the preemption enabled, see nbcon_owner_matches(). But it + * can be safe when always called in the preemptive context + * under the device_lock(). + * + * [**] The device_lock() makes sure that nbcon_context_try_acquire() + * would never need to spin which is important especially with + * PREEMPT_RT. + */ + void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt); + + /** + * @device_lock: + * + * NBCON callback to begin synchronization with driver code. + * + * Console drivers typically must deal with access to the hardware + * via user input/output (such as an interactive login shell) and + * output of kernel messages via printk() calls. This callback is + * called by the printk-subsystem whenever it needs to synchronize + * with hardware access by the driver. It should be implemented to + * use whatever synchronization mechanism the driver is using for + * itself (for example, the port lock for uart serial consoles). + * + * The callback is always called from task context. It may use any + * synchronization method required by the driver. + * + * IMPORTANT: The callback MUST disable migration. The console driver + * may be using a synchronization mechanism that already takes + * care of this (such as spinlocks). Otherwise this function must + * explicitly call migrate_disable(). + * + * The flags argument is provided as a convenience to the driver. It + * will be passed again to device_unlock(). It can be ignored if the + * driver does not need it. + */ + void (*device_lock)(struct console *con, unsigned long *flags); + + /** + * @device_unlock: + * + * NBCON callback to finish synchronization with driver code. + * + * It is the counterpart to device_lock(). + * + * This callback is always called from task context. It must + * appropriately re-enable migration (depending on how device_lock() + * disabled migration). + * + * The flags argument is the value of the same variable that was + * passed to device_lock(). + */ + void (*device_unlock)(struct console *con, unsigned long flags); + atomic_t __private nbcon_state; atomic_long_t __private nbcon_seq; + struct nbcon_context __private nbcon_device_ctxt; + atomic_long_t __private nbcon_prev_seq; + struct printk_buffers *pbufs; + struct task_struct *kthread; + struct rcuwait rcuwait; + struct irq_work irq_work; }; #ifdef CONFIG_LOCKDEP @@ -378,28 +494,34 @@ extern void console_list_unlock(void) __releases(console_mutex); extern struct hlist_head console_list; /** - * console_srcu_read_flags - Locklessly read the console flags + * console_srcu_read_flags - Locklessly read flags of a possibly registered + * console * @con: struct console pointer of console to read flags from * - * This function provides the necessary READ_ONCE() and data_race() - * notation for locklessly reading the console flags. The READ_ONCE() - * in this function matches the WRITE_ONCE() when @flags are modified - * for registered consoles with console_srcu_write_flags(). + * Locklessly reading @con->flags provides a consistent read value because + * there is at most one CPU modifying @con->flags and that CPU is using only + * read-modify-write operations to do so. + * + * Requires console_srcu_read_lock to be held, which implies that @con might + * be a registered console. The purpose of holding console_srcu_read_lock is + * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED) + * and that no exit/cleanup routines will run if the console is currently + * undergoing unregistration. * - * Only use this function to read console flags when locklessly - * iterating the console list via srcu. + * If the caller is holding the console_list_lock or it is _certain_ that + * @con is not and will not become registered, the caller may read + * @con->flags directly instead. * * Context: Any context. + * Return: The current value of the @con->flags field. */ static inline short console_srcu_read_flags(const struct console *con) { WARN_ON_ONCE(!console_srcu_read_lock_is_held()); /* - * Locklessly reading console->flags provides a consistent - * read value because there is at most one CPU modifying - * console->flags and that CPU is using only read-modify-write - * operations to do so. + * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified + * for registered consoles with console_srcu_write_flags(). */ return data_race(READ_ONCE(con->flags)); } @@ -477,13 +599,19 @@ static inline bool console_is_registered(const struct console *con) hlist_for_each_entry(con, &console_list, node) #ifdef CONFIG_PRINTK +extern void nbcon_cpu_emergency_enter(void); +extern void nbcon_cpu_emergency_exit(void); extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt); extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt); extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt); +extern void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt); #else +static inline void nbcon_cpu_emergency_enter(void) { } +static inline void nbcon_cpu_emergency_exit(void) { } static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; } static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; } +static inline void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt) { } #endif extern int console_set_on_cmdline; diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 6e76b9dba00e..af9fe87a0922 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -26,26 +26,26 @@ extern void user_exit_callable(void); static inline void user_enter(void) { if (context_tracking_enabled()) - ct_user_enter(CONTEXT_USER); + ct_user_enter(CT_STATE_USER); } static inline void user_exit(void) { if (context_tracking_enabled()) - ct_user_exit(CONTEXT_USER); + ct_user_exit(CT_STATE_USER); } /* Called with interrupts disabled. */ static __always_inline void user_enter_irqoff(void) { if (context_tracking_enabled()) - __ct_user_enter(CONTEXT_USER); + __ct_user_enter(CT_STATE_USER); } static __always_inline void user_exit_irqoff(void) { if (context_tracking_enabled()) - __ct_user_exit(CONTEXT_USER); + __ct_user_exit(CT_STATE_USER); } static inline enum ctx_state exception_enter(void) @@ -57,7 +57,7 @@ static inline enum ctx_state exception_enter(void) return 0; prev_ctx = __ct_state(); - if (prev_ctx != CONTEXT_KERNEL) + if (prev_ctx != CT_STATE_KERNEL) ct_user_exit(prev_ctx); return prev_ctx; @@ -67,7 +67,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK) && context_tracking_enabled()) { - if (prev_ctx != CONTEXT_KERNEL) + if (prev_ctx != CT_STATE_KERNEL) ct_user_enter(prev_ctx); } } @@ -75,15 +75,17 @@ static inline void exception_exit(enum ctx_state prev_ctx) static __always_inline bool context_tracking_guest_enter(void) { if (context_tracking_enabled()) - __ct_user_enter(CONTEXT_GUEST); + __ct_user_enter(CT_STATE_GUEST); return context_tracking_enabled_this_cpu(); } -static __always_inline void context_tracking_guest_exit(void) +static __always_inline bool context_tracking_guest_exit(void) { if (context_tracking_enabled()) - __ct_user_exit(CONTEXT_GUEST); + __ct_user_exit(CT_STATE_GUEST); + + return context_tracking_enabled_this_cpu(); } #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) @@ -98,7 +100,7 @@ static inline void exception_exit(enum ctx_state prev_ctx) { } static inline int ct_state(void) { return -1; } static inline int __ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } -static __always_inline void context_tracking_guest_exit(void) { } +static __always_inline bool context_tracking_guest_exit(void) { return false; } #define CT_WARN_ON(cond) do { } while (0) #endif /* !CONFIG_CONTEXT_TRACKING_USER */ @@ -113,13 +115,17 @@ extern void ct_idle_enter(void); extern void ct_idle_exit(void); /* - * Is the current CPU in an extended quiescent state? + * Is RCU watching the current CPU (IOW, it is not in an extended quiescent state)? + * + * Note that this returns the actual boolean data (watching / not watching), + * whereas ct_rcu_watching() returns the RCU_WATCHING subvariable of + * context_tracking.state. * * No ordering, as we are sampling CPU-local information. */ -static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void) +static __always_inline bool rcu_is_watching_curr_cpu(void) { - return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX); + return raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING; } /* @@ -140,9 +146,9 @@ static __always_inline bool warn_rcu_enter(void) * lots of the actual reporting also relies on RCU. */ preempt_disable_notrace(); - if (rcu_dynticks_curr_cpu_in_eqs()) { + if (!rcu_is_watching_curr_cpu()) { ret = true; - ct_state_inc(RCU_DYNTICKS_IDX); + ct_state_inc(CT_RCU_WATCHING); } return ret; @@ -151,7 +157,7 @@ static __always_inline bool warn_rcu_enter(void) static __always_inline void warn_rcu_exit(bool rcu) { if (rcu) - ct_state_inc(RCU_DYNTICKS_IDX); + ct_state_inc(CT_RCU_WATCHING); preempt_enable_notrace(); } diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index bbff5f7f8803..7b8433d5a8ef 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -7,22 +7,22 @@ #include <linux/context_tracking_irq.h> /* Offset to allow distinguishing irq vs. task-based idle entry/exit. */ -#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1) +#define CT_NESTING_IRQ_NONIDLE ((LONG_MAX / 2) + 1) enum ctx_state { - CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ - CONTEXT_KERNEL = 0, - CONTEXT_IDLE = 1, - CONTEXT_USER = 2, - CONTEXT_GUEST = 3, - CONTEXT_MAX = 4, + CT_STATE_DISABLED = -1, /* returned by ct_state() if unknown */ + CT_STATE_KERNEL = 0, + CT_STATE_IDLE = 1, + CT_STATE_USER = 2, + CT_STATE_GUEST = 3, + CT_STATE_MAX = 4, }; -/* Even value for idle, else odd. */ -#define RCU_DYNTICKS_IDX CONTEXT_MAX +/* Odd value for watching, else even. */ +#define CT_RCU_WATCHING CT_STATE_MAX -#define CT_STATE_MASK (CONTEXT_MAX - 1) -#define CT_DYNTICKS_MASK (~CT_STATE_MASK) +#define CT_STATE_MASK (CT_STATE_MAX - 1) +#define CT_RCU_WATCHING_MASK (~CT_STATE_MASK) struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING_USER @@ -39,8 +39,8 @@ struct context_tracking { atomic_t state; #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE - long dynticks_nesting; /* Track process nesting level. */ - long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */ + long nesting; /* Track process nesting level. */ + long nmi_nesting; /* Track irq/NMI nesting level. */ #endif }; @@ -56,47 +56,47 @@ static __always_inline int __ct_state(void) #endif #ifdef CONFIG_CONTEXT_TRACKING_IDLE -static __always_inline int ct_dynticks(void) +static __always_inline int ct_rcu_watching(void) { - return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_DYNTICKS_MASK; + return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_RCU_WATCHING_MASK; } -static __always_inline int ct_dynticks_cpu(int cpu) +static __always_inline int ct_rcu_watching_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return atomic_read(&ct->state) & CT_DYNTICKS_MASK; + return atomic_read(&ct->state) & CT_RCU_WATCHING_MASK; } -static __always_inline int ct_dynticks_cpu_acquire(int cpu) +static __always_inline int ct_rcu_watching_cpu_acquire(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return atomic_read_acquire(&ct->state) & CT_DYNTICKS_MASK; + return atomic_read_acquire(&ct->state) & CT_RCU_WATCHING_MASK; } -static __always_inline long ct_dynticks_nesting(void) +static __always_inline long ct_nesting(void) { - return __this_cpu_read(context_tracking.dynticks_nesting); + return __this_cpu_read(context_tracking.nesting); } -static __always_inline long ct_dynticks_nesting_cpu(int cpu) +static __always_inline long ct_nesting_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return ct->dynticks_nesting; + return ct->nesting; } -static __always_inline long ct_dynticks_nmi_nesting(void) +static __always_inline long ct_nmi_nesting(void) { - return __this_cpu_read(context_tracking.dynticks_nmi_nesting); + return __this_cpu_read(context_tracking.nmi_nesting); } -static __always_inline long ct_dynticks_nmi_nesting_cpu(int cpu) +static __always_inline long ct_nmi_nesting_cpu(int cpu) { struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu); - return ct->dynticks_nmi_nesting; + return ct->nmi_nesting; } #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */ @@ -113,7 +113,7 @@ static __always_inline bool context_tracking_enabled_cpu(int cpu) return context_tracking_enabled() && per_cpu(context_tracking.active, cpu); } -static inline bool context_tracking_enabled_this_cpu(void) +static __always_inline bool context_tracking_enabled_this_cpu(void) { return context_tracking_enabled() && __this_cpu_read(context_tracking.active); } @@ -123,14 +123,14 @@ static inline bool context_tracking_enabled_this_cpu(void) * * Returns the current cpu's context tracking state if context tracking * is enabled. If context tracking is disabled, returns - * CONTEXT_DISABLED. This should be used primarily for debugging. + * CT_STATE_DISABLED. This should be used primarily for debugging. */ static __always_inline int ct_state(void) { int ret; if (!context_tracking_enabled()) - return CONTEXT_DISABLED; + return CT_STATE_DISABLED; preempt_disable(); ret = __ct_state(); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 0904ba010341..45e598fe3476 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -43,8 +43,30 @@ extern int dump_align(struct coredump_params *cprm, int align); int dump_user_range(struct coredump_params *cprm, unsigned long start, unsigned long len); extern void do_coredump(const kernel_siginfo_t *siginfo); + +/* + * Logging for the coredump code, ratelimited. + * The TGID and comm fields are added to the message. + */ + +#define __COREDUMP_PRINTK(Level, Format, ...) \ + do { \ + char comm[TASK_COMM_LEN]; \ + \ + get_task_comm(comm, current); \ + printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n", \ + task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__); \ + } while (0) \ + +#define coredump_report(fmt, ...) __COREDUMP_PRINTK(KERN_INFO, fmt, ##__VA_ARGS__) +#define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__) + #else static inline void do_coredump(const kernel_siginfo_t *siginfo) {} + +#define coredump_report(...) +#define coredump_report_failure(...) + #endif #if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL) diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 51ac441a37c3..89b0ac0014b0 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -49,12 +49,21 @@ * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. * Used to associate a CPU with the CoreSight Trace ID. * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. - * [59:08] - Unused (SBZ) - * [63:60] - Version + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/ + * Added in minor version 1. + * [55:40] - Unused (SBZ) + * [59:56] - Minor Version - previously existing fields are compatible with + * all minor versions. + * [63:60] - Major Version - previously existing fields mean different things + * in new major versions. */ #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8) -#define CS_AUX_HW_ID_CURR_VERSION 0 +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56) +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_MAJOR_VERSION 0 +#define CS_AUX_HW_ID_MINOR_VERSION 1 #endif diff --git a/include/linux/coresight.h b/include/linux/coresight.h index f09ace92176e..c13342594278 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -218,6 +218,24 @@ struct coresight_sysfs_link { const char *target_name; }; +/* architecturally we have 128 IDs some of which are reserved */ +#define CORESIGHT_TRACE_IDS_MAX 128 + +/** + * Trace ID map. + * + * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs. + * Initialised so that the reserved IDs are permanently marked as + * in use. + * @perf_cs_etm_session_active: Number of Perf sessions using this ID map. + */ +struct coresight_trace_id_map { + DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); + atomic_t __percpu *cpu_map; + atomic_t perf_cs_etm_session_active; + spinlock_t lock; +}; + /** * struct coresight_device - representation of a device as used by the framework * @pdata: Platform data with device connections associated to this device. @@ -271,6 +289,7 @@ struct coresight_device { bool sysfs_sink_activated; struct dev_ext_attribute *ea; struct coresight_device *def_sink; + struct coresight_trace_id_map perf_sink_id_map; /* sysfs links between components */ int nr_links; bool has_conns_grp; @@ -365,7 +384,7 @@ struct coresight_ops_link { struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, - enum cs_mode mode); + enum cs_mode mode, struct coresight_trace_id_map *id_map); void (*disable)(struct coresight_device *csdev, struct perf_event *event); }; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d4d2f4d1d7cb..7fe0981a7e46 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -577,12 +577,6 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define CPUFREQ_POLICY_POWERSAVE (1) #define CPUFREQ_POLICY_PERFORMANCE (2) -/* - * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. - */ -#define LATENCY_MULTIPLIER (1000) - struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; int (*init)(struct cpufreq_policy *policy); @@ -1113,10 +1107,9 @@ static inline int parse_perf_domain(int cpu, const char *list_name, const char *cell_name, struct of_phandle_args *args) { - struct device_node *cpu_np; int ret; - cpu_np = of_cpu_device_node_get(cpu); + struct device_node *cpu_np __free(device_node) = of_cpu_device_node_get(cpu); if (!cpu_np) return -ENODEV; @@ -1124,9 +1117,6 @@ static inline int parse_perf_domain(int cpu, const char *list_name, args); if (ret < 0) return ret; - - of_node_put(cpu_np); - return 0; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 9316c39260e0..a04b73c40173 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -144,15 +144,16 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_IRQ_MIPS_GIC_STARTING, - CPUHP_AP_IRQ_LOONGARCH_STARTING, + CPUHP_AP_IRQ_EIOINTC_STARTING, + CPUHP_AP_IRQ_AVECINTC_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, + CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, CPUHP_AP_PERF_X86_AMD_IBS_STARTING, - CPUHP_AP_PERF_X86_CSTATE_STARTING, CPUHP_AP_PERF_XTENSA_STARTING, CPUHP_AP_ARM_VFP_STARTING, CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, @@ -208,8 +209,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, CPUHP_AP_PERF_X86_AMD_POWER_ONLINE, - CPUHP_AP_PERF_X86_RAPL_ONLINE, - CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, @@ -228,6 +227,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE, CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE, CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE, + CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE, CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 3183aeb7f5b4..a9ee4fe55dcf 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -61,7 +61,7 @@ struct cpuidle_state { struct cpuidle_driver *drv, int index); - int (*enter_dead) (struct cpuidle_device *dev, int index); + void (*enter_dead) (struct cpuidle_device *dev, int index); /* * CPUs execute ->enter_s2idle with the local tick or entire timekeeping diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 53158de44b83..9278a50d514f 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -30,7 +30,7 @@ extern unsigned int nr_cpu_ids; #endif -static inline void set_nr_cpu_ids(unsigned int nr) +static __always_inline void set_nr_cpu_ids(unsigned int nr) { #if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) WARN_ON(nr != nr_cpu_ids); @@ -149,7 +149,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) * * Return: >= nr_cpu_ids if no cpus set. */ -static inline unsigned int cpumask_first(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_first(const struct cpumask *srcp) { return find_first_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -160,7 +160,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if all cpus are set. */ -static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -172,7 +172,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ -static inline +static __always_inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); @@ -186,7 +186,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask * * Return: >= nr_cpu_ids if no cpus set in all. */ -static inline +static __always_inline unsigned int cpumask_first_and_and(const struct cpumask *srcp1, const struct cpumask *srcp2, const struct cpumask *srcp3) @@ -201,7 +201,7 @@ unsigned int cpumask_first_and_and(const struct cpumask *srcp1, * * Return: >= nr_cpumask_bits if no CPUs set. */ -static inline unsigned int cpumask_last(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_last(const struct cpumask *srcp) { return find_last_bit(cpumask_bits(srcp), small_cpumask_bits); } @@ -213,7 +213,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no further cpus set. */ -static inline +static __always_inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ @@ -229,7 +229,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) * * Return: >= nr_cpu_ids if no further cpus unset. */ -static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) { /* -1 is a legal arg here. */ if (n != -1) @@ -239,18 +240,21 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) #if NR_CPUS == 1 /* Uniprocessor: there is only one valid CPU */ -static inline unsigned int cpumask_local_spread(unsigned int i, int node) +static __always_inline +unsigned int cpumask_local_spread(unsigned int i, int node) { return 0; } -static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, + const struct cpumask *src2p) { return cpumask_first_and(src1p, src2p); } -static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_any_distribute(const struct cpumask *srcp) { return cpumask_first(srcp); } @@ -269,9 +273,9 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp); * * Return: >= nr_cpu_ids if no further cpus set in both. */ -static inline +static __always_inline unsigned int cpumask_next_and(int n, const struct cpumask *src1p, - const struct cpumask *src2p) + const struct cpumask *src2p) { /* -1 is a legal arg here. */ if (n != -1) @@ -291,7 +295,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) #if NR_CPUS == 1 -static inline +static __always_inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) { cpumask_check(start); @@ -394,7 +398,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * Often used to find any cpu but smp_processor_id() in a mask. * Return: >= nr_cpu_ids if no cpus set. */ -static inline +static __always_inline unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) { unsigned int i; @@ -414,7 +418,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) * * Returns >= nr_cpu_ids if no cpus set. */ -static inline +static __always_inline unsigned int cpumask_any_and_but(const struct cpumask *mask1, const struct cpumask *mask2, unsigned int cpu) @@ -436,7 +440,8 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1, * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) +static __always_inline +unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu)); } @@ -449,7 +454,7 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline +static __always_inline unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -465,7 +470,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, * * Return: >= nr_cpu_ids if such cpu doesn't exist. */ -static inline +static __always_inline unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { @@ -508,12 +513,14 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp * @cpu: cpu number (< nr_cpu_ids) * @dstp: the cpumask pointer */ -static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline +void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } -static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) +static __always_inline +void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp) { __set_bit(cpumask_check(cpu), cpumask_bits(dstp)); } @@ -557,7 +564,8 @@ static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, * * Return: true if @cpu is set in @cpumask, else returns false */ -static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) +static __always_inline +bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask) { return test_bit(cpumask_check(cpu), cpumask_bits((cpumask))); } @@ -571,7 +579,8 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum * * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ -static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) +static __always_inline +bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask) { return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -585,7 +594,8 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp * * Return: true if @cpu is set in old bitmap of @cpumask, else returns false */ -static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) +static __always_inline +bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask) { return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask)); } @@ -594,7 +604,7 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ -static inline void cpumask_setall(struct cpumask *dstp) +static __always_inline void cpumask_setall(struct cpumask *dstp) { if (small_const_nbits(small_cpumask_bits)) { cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits); @@ -607,7 +617,7 @@ static inline void cpumask_setall(struct cpumask *dstp) * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer */ -static inline void cpumask_clear(struct cpumask *dstp) +static __always_inline void cpumask_clear(struct cpumask *dstp) { bitmap_zero(cpumask_bits(dstp), large_cpumask_bits); } @@ -620,9 +630,9 @@ static inline void cpumask_clear(struct cpumask *dstp) * * Return: false if *@dstp is empty, else returns true */ -static inline bool cpumask_and(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -634,8 +644,9 @@ static inline bool cpumask_and(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -647,9 +658,9 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_xor(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -663,9 +674,9 @@ static inline void cpumask_xor(struct cpumask *dstp, * * Return: false if *@dstp is empty, else returns true */ -static inline bool cpumask_andnot(struct cpumask *dstp, - const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, + const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -678,8 +689,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp, * * Return: true if the cpumasks are equal, false if not */ -static inline bool cpumask_equal(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -694,9 +705,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p, * Return: true if first cpumask ORed with second cpumask == third cpumask, * otherwise false */ -static inline bool cpumask_or_equal(const struct cpumask *src1p, - const struct cpumask *src2p, - const struct cpumask *src3p) +static __always_inline +bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p, + const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), cpumask_bits(src3p), small_cpumask_bits); @@ -710,8 +721,8 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, * Return: true if first cpumask ANDed with second cpumask is non-empty, * otherwise false */ -static inline bool cpumask_intersects(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -724,8 +735,8 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, * * Return: true if *@src1p is a subset of *@src2p, else returns false */ -static inline bool cpumask_subset(const struct cpumask *src1p, - const struct cpumask *src2p) +static __always_inline +bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), small_cpumask_bits); @@ -737,7 +748,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, * * Return: true if srcp is empty (has no bits set), else false */ -static inline bool cpumask_empty(const struct cpumask *srcp) +static __always_inline bool cpumask_empty(const struct cpumask *srcp) { return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits); } @@ -748,7 +759,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp) * * Return: true if srcp is full (has all bits set), else false */ -static inline bool cpumask_full(const struct cpumask *srcp) +static __always_inline bool cpumask_full(const struct cpumask *srcp) { return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits); } @@ -759,7 +770,7 @@ static inline bool cpumask_full(const struct cpumask *srcp) * * Return: count of bits set in *srcp */ -static inline unsigned int cpumask_weight(const struct cpumask *srcp) +static __always_inline unsigned int cpumask_weight(const struct cpumask *srcp) { return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits); } @@ -771,8 +782,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) * * Return: count of bits set in both *srcp1 and *srcp2 */ -static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, - const struct cpumask *srcp2) +static __always_inline +unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } @@ -784,8 +795,9 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, * * Return: count of bits set in both *srcp1 and *srcp2 */ -static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, - const struct cpumask *srcp2) +static __always_inline +unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, + const struct cpumask *srcp2) { return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } @@ -796,8 +808,8 @@ static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, * @srcp: the input to shift * @n: the number of bits to shift by */ -static inline void cpumask_shift_right(struct cpumask *dstp, - const struct cpumask *srcp, int n) +static __always_inline +void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, small_cpumask_bits); @@ -809,8 +821,8 @@ static inline void cpumask_shift_right(struct cpumask *dstp, * @srcp: the input to shift * @n: the number of bits to shift by */ -static inline void cpumask_shift_left(struct cpumask *dstp, - const struct cpumask *srcp, int n) +static __always_inline +void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n, nr_cpumask_bits); @@ -821,8 +833,8 @@ static inline void cpumask_shift_left(struct cpumask *dstp, * @dstp: the result * @srcp: the input cpumask */ -static inline void cpumask_copy(struct cpumask *dstp, - const struct cpumask *srcp) +static __always_inline +void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits); } @@ -858,8 +870,8 @@ static inline void cpumask_copy(struct cpumask *dstp, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parse_user(const char __user *buf, int len, - struct cpumask *dstp) +static __always_inline +int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); } @@ -872,8 +884,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parselist_user(const char __user *buf, int len, - struct cpumask *dstp) +static __always_inline +int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); @@ -886,7 +898,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, * * Return: -errno, or 0 for success. */ -static inline int cpumask_parse(const char *buf, struct cpumask *dstp) +static __always_inline int cpumask_parse(const char *buf, struct cpumask *dstp) { return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits); } @@ -898,7 +910,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) * * Return: -errno, or 0 for success. */ -static inline int cpulist_parse(const char *buf, struct cpumask *dstp) +static __always_inline int cpulist_parse(const char *buf, struct cpumask *dstp) { return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); } @@ -908,7 +920,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) * * Return: size to allocate for a &struct cpumask in bytes */ -static inline unsigned int cpumask_size(void) +static __always_inline unsigned int cpumask_size(void) { return bitmap_size(large_cpumask_bits); } @@ -920,7 +932,7 @@ static inline unsigned int cpumask_size(void) bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); -static inline +static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); @@ -938,13 +950,13 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) * * Return: %true if allocation succeeded, %false if not */ -static inline +static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE); } -static inline +static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return alloc_cpumask_var(mask, flags | __GFP_ZERO); @@ -954,7 +966,7 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); -static inline bool cpumask_available(cpumask_var_t mask) +static __always_inline bool cpumask_available(cpumask_var_t mask) { return mask != NULL; } @@ -964,43 +976,43 @@ static inline bool cpumask_available(cpumask_var_t mask) #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) #define __cpumask_var_read_mostly -static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { return true; } -static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, +static __always_inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { return true; } -static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { cpumask_clear(*mask); return true; } -static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, +static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { cpumask_clear(*mask); return true; } -static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) +static __always_inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } -static inline void free_cpumask_var(cpumask_var_t mask) +static __always_inline void free_cpumask_var(cpumask_var_t mask) { } -static inline void free_bootmem_cpumask_var(cpumask_var_t mask) +static __always_inline void free_bootmem_cpumask_var(cpumask_var_t mask) { } -static inline bool cpumask_available(cpumask_var_t mask) +static __always_inline bool cpumask_available(cpumask_var_t mask) { return true; } @@ -1058,7 +1070,7 @@ void set_cpu_online(unsigned int cpu, bool online); ((struct cpumask *)(1 ? (bitmap) \ : (void *)sizeof(__check_is_bitmap(bitmap)))) -static inline int __check_is_bitmap(const unsigned long *bitmap) +static __always_inline int __check_is_bitmap(const unsigned long *bitmap) { return 1; } @@ -1073,7 +1085,7 @@ static inline int __check_is_bitmap(const unsigned long *bitmap) extern const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)]; -static inline const struct cpumask *get_cpu_mask(unsigned int cpu) +static __always_inline const struct cpumask *get_cpu_mask(unsigned int cpu) { const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG]; p -= cpu / BITS_PER_LONG; @@ -1100,32 +1112,32 @@ static __always_inline unsigned int num_online_cpus(void) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) -static inline bool cpu_online(unsigned int cpu) +static __always_inline bool cpu_online(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_online_mask); } -static inline bool cpu_enabled(unsigned int cpu) +static __always_inline bool cpu_enabled(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_enabled_mask); } -static inline bool cpu_possible(unsigned int cpu) +static __always_inline bool cpu_possible(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_possible_mask); } -static inline bool cpu_present(unsigned int cpu) +static __always_inline bool cpu_present(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_present_mask); } -static inline bool cpu_active(unsigned int cpu) +static __always_inline bool cpu_active(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_active_mask); } -static inline bool cpu_dying(unsigned int cpu) +static __always_inline bool cpu_dying(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_dying_mask); } @@ -1138,32 +1150,32 @@ static inline bool cpu_dying(unsigned int cpu) #define num_present_cpus() 1U #define num_active_cpus() 1U -static inline bool cpu_online(unsigned int cpu) +static __always_inline bool cpu_online(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_possible(unsigned int cpu) +static __always_inline bool cpu_possible(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_enabled(unsigned int cpu) +static __always_inline bool cpu_enabled(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_present(unsigned int cpu) +static __always_inline bool cpu_present(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_active(unsigned int cpu) +static __always_inline bool cpu_active(unsigned int cpu) { return cpu == 0; } -static inline bool cpu_dying(unsigned int cpu) +static __always_inline bool cpu_dying(unsigned int cpu) { return false; } @@ -1197,7 +1209,7 @@ static inline bool cpu_dying(unsigned int cpu) * Return: the length of the (null-terminated) @buf string, zero if * nothing is copied. */ -static inline ssize_t +static __always_inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), @@ -1220,9 +1232,9 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ -static inline ssize_t -cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, - loff_t off, size_t count) +static __always_inline +ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, + loff_t off, size_t count) { return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask), nr_cpu_ids, off, count) - 1; @@ -1242,9 +1254,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, * Return: the length of how many bytes have been copied, excluding * terminating '\0'. */ -static inline ssize_t -cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, - loff_t off, size_t count) +static __always_inline +ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, + loff_t off, size_t count) { return bitmap_print_list_to_buf(buf, cpumask_bits(mask), nr_cpu_ids, off, count) - 1; diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index de4cf0ee96f7..835e7b793f6a 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -99,6 +99,7 @@ static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, const struct task_struct *tsk2); +#ifdef CONFIG_CPUSETS_V1 #define cpuset_memory_pressure_bump() \ do { \ if (cpuset_memory_pressure_enabled) \ @@ -106,6 +107,9 @@ extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, } while (0) extern int cpuset_memory_pressure_enabled; extern void __cpuset_memory_pressure_bump(void); +#else +static inline void cpuset_memory_pressure_bump(void) { } +#endif extern void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task); @@ -113,7 +117,6 @@ extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); extern int cpuset_mem_spread_node(void); -extern int cpuset_slab_spread_node(void); static inline int cpuset_do_page_mem_spread(void) { @@ -246,11 +249,6 @@ static inline int cpuset_mem_spread_node(void) return 0; } -static inline int cpuset_slab_spread_node(void) -{ - return 0; -} - static inline int cpuset_do_page_mem_spread(void) { return 0; diff --git a/include/linux/cred.h b/include/linux/cred.h index 2976f534a7a3..e4a3155fe409 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -172,6 +172,24 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } +/* + * Override creds without bumping reference count. Caller must ensure + * reference remains valid or has taken reference. Almost always not the + * interface you want. Use override_creds()/revert_creds() instead. + */ +static inline const struct cred *override_creds_light(const struct cred *override_cred) +{ + const struct cred *old = current->cred; + + rcu_assign_pointer(current->cred, override_cred); + return old; +} + +static inline void revert_creds_light(const struct cred *revert_cred) +{ + rcu_assign_pointer(current->cred, revert_cred); +} + /** * get_new_cred_many - Get references on a new set of credentials * @cred: The new credentials to reference diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h deleted file mode 100644 index 0bea1afbd747..000000000000 --- a/include/linux/cxl-event.h +++ /dev/null @@ -1,175 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2023 Intel Corporation. */ -#ifndef _LINUX_CXL_EVENT_H -#define _LINUX_CXL_EVENT_H - -#include <linux/types.h> -#include <linux/uuid.h> -#include <linux/workqueue_types.h> - -/* - * Common Event Record Format - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 - */ -struct cxl_event_record_hdr { - u8 length; - u8 flags[3]; - __le16 handle; - __le16 related_handle; - __le64 timestamp; - u8 maint_op_class; - u8 reserved[15]; -} __packed; - -struct cxl_event_media_hdr { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - /* - * The meaning of Validity Flags from bit 2 is - * different across DRAM and General Media records - */ - u8 validity_flags[2]; - u8 channel; - u8 rank; -} __packed; - -#define CXL_EVENT_RECORD_DATA_LENGTH 0x50 -struct cxl_event_generic { - struct cxl_event_record_hdr hdr; - u8 data[CXL_EVENT_RECORD_DATA_LENGTH]; -} __packed; - -/* - * General Media Event Record - * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43 - */ -#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 -struct cxl_event_gen_media { - struct cxl_event_media_hdr media_hdr; - u8 device[3]; - u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; - u8 reserved[46]; -} __packed; - -/* - * DRAM Event Record - DER - * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44 - */ -#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 -struct cxl_event_dram { - struct cxl_event_media_hdr media_hdr; - u8 nibble_mask[3]; - u8 bank_group; - u8 bank; - u8 row[3]; - u8 column[2]; - u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE]; - u8 reserved[0x17]; -} __packed; - -/* - * Get Health Info Record - * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100 - */ -struct cxl_get_health_info { - u8 health_status; - u8 media_status; - u8 add_status; - u8 life_used; - u8 device_temp[2]; - u8 dirty_shutdown_cnt[4]; - u8 cor_vol_err_cnt[4]; - u8 cor_per_err_cnt[4]; -} __packed; - -/* - * Memory Module Event Record - * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45 - */ -struct cxl_event_mem_module { - struct cxl_event_record_hdr hdr; - u8 event_type; - struct cxl_get_health_info info; - u8 reserved[0x3d]; -} __packed; - -union cxl_event { - struct cxl_event_generic generic; - struct cxl_event_gen_media gen_media; - struct cxl_event_dram dram; - struct cxl_event_mem_module mem_module; - /* dram & gen_media event header */ - struct cxl_event_media_hdr media_hdr; -} __packed; - -/* - * Common Event Record Format; in event logs - * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 - */ -struct cxl_event_record_raw { - uuid_t id; - union cxl_event event; -} __packed; - -enum cxl_event_type { - CXL_CPER_EVENT_GENERIC, - CXL_CPER_EVENT_GEN_MEDIA, - CXL_CPER_EVENT_DRAM, - CXL_CPER_EVENT_MEM_MODULE, -}; - -#define CPER_CXL_DEVICE_ID_VALID BIT(0) -#define CPER_CXL_DEVICE_SN_VALID BIT(1) -#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2) -struct cxl_cper_event_rec { - struct { - u32 length; - u64 validation_bits; - struct cper_cxl_event_devid { - u16 vendor_id; - u16 device_id; - u8 func_num; - u8 device_num; - u8 bus_num; - u16 segment_num; - u16 slot_num; /* bits 2:0 reserved */ - u8 reserved; - } __packed device_id; - struct cper_cxl_event_sn { - u32 lower_dw; - u32 upper_dw; - } __packed dev_serial_num; - } __packed hdr; - - union cxl_event event; -} __packed; - -struct cxl_cper_work_data { - enum cxl_event_type event_type; - struct cxl_cper_event_rec rec; -}; - -#ifdef CONFIG_ACPI_APEI_GHES -int cxl_cper_register_work(struct work_struct *work); -int cxl_cper_unregister_work(struct work_struct *work); -int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd); -#else -static inline int cxl_cper_register_work(struct work_struct *work) -{ - return 0; -} - -static inline int cxl_cper_unregister_work(struct work_struct *work) -{ - return 0; -} -static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) -{ - return 0; -} -#endif - -#endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/damon.h b/include/linux/damon.h index 27c546bfc6d4..a67f2c4940e9 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -233,7 +233,6 @@ struct damos_quota { unsigned long charge_addr_from; /* For prioritization */ - unsigned long histogram[DAMOS_MAX_SCORE + 1]; unsigned int min_score; /* For feedback loop */ @@ -630,6 +629,8 @@ struct damon_ctx { unsigned long next_ops_update_sis; /* for waiting until the execution of the kdamond_fn is started */ struct completion kdamond_started; + /* for scheme quotas prioritization */ + unsigned long *regions_score_histogram; /* public: */ struct task_struct *kdamond; diff --git a/include/linux/dax.h b/include/linux/dax.h index 9d3e3327af4c..df41a0017b31 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -27,12 +27,6 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, enum dax_access_mode, void **, pfn_t *); - /* - * Validate whether this device is usable as an fsdax backing - * device. - */ - bool (*dax_supported)(struct dax_device *, struct block_device *, int, - sector_t, sector_t); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); /* diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c9c65b132c0f..59444b495d49 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -57,7 +57,6 @@ static const struct file_operations __fops = { \ .release = simple_attr_release, \ .read = debugfs_attr_read, \ .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \ - .llseek = no_llseek, \ } #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ @@ -72,9 +71,63 @@ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_lookup(const char *name, struct dentry *parent); -struct dentry *debugfs_create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); +struct debugfs_short_fops { + ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); + ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); + loff_t (*llseek) (struct file *, loff_t, int); +}; + +struct dentry *debugfs_create_file_full(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); +struct dentry *debugfs_create_file_short(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct debugfs_short_fops *fops); + +/** + * debugfs_create_file - create a file in the debugfs filesystem + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is NULL, then the + * file will be created in the root of the debugfs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.i_private pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations or struct debugfs_short_fops that + * should be used for this file. + * + * This is the basic "create a file" function for debugfs. It allows for a + * wide range of flexibility in creating a file, or a directory (if you want + * to create a directory, the debugfs_create_dir() function is + * recommended to be used instead.) + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the debugfs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be + * returned. + * + * If debugfs is not enabled in the kernel, the value -%ENODEV will be + * returned. + * + * If fops points to a struct debugfs_short_fops, then simple_open() will be + * used for the open, and only read/write/llseek are supported and are proxied, + * so no module reference or release are needed. + * + * NOTE: it's expected that most callers should _ignore_ the errors returned + * by this function. Other debugfs functions handle the fact that the "dentry" + * passed to them could be an error and they don't crash in that case. + * Drivers should generally work fine even if debugfs fails to init anyway. + */ +#define debugfs_create_file(name, mode, parent, data, fops) \ + _Generic(fops, \ + const struct file_operations *: debugfs_create_file_full, \ + const struct debugfs_short_fops *: debugfs_create_file_short, \ + struct file_operations *: debugfs_create_file_full, \ + struct debugfs_short_fops *: debugfs_create_file_short) \ + (name, mode, parent, data, fops) + struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); @@ -208,7 +261,7 @@ static inline struct dentry *debugfs_lookup(const char *name, static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, - const struct file_operations *fops) + const void *fops) { return ERR_PTR(-ENODEV); } diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 32444686b6ff..8b95545e7924 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -23,13 +23,17 @@ struct debug_obj_descr; * @state: tracked object state * @astate: current active state * @object: pointer to the real object + * @batch_last: pointer to the last hlist node in a batch * @descr: pointer to an object type specific debug description structure */ struct debug_obj { - struct hlist_node node; - enum debug_obj_state state; - unsigned int astate; - void *object; + struct hlist_node node; + enum debug_obj_state state; + unsigned int astate; + union { + void *object; + struct hlist_node *batch_last; + }; const struct debug_obj_descr *descr; }; diff --git a/include/linux/decompress/unxz.h b/include/linux/decompress/unxz.h index f764e2a7201e..3dd2658a9dab 100644 --- a/include/linux/decompress/unxz.h +++ b/include/linux/decompress/unxz.h @@ -1,10 +1,9 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd * * Author: Lasse Collin <lasse.collin@tukaani.org> - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef DECOMPRESS_UNXZ_H diff --git a/include/linux/delay.h b/include/linux/delay.h index ff9cda975e30..89866bab100d 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -6,21 +6,12 @@ * Copyright (C) 1993 Linus Torvalds * * Delay routines, using a pre-computed "loops_per_jiffy" value. - * - * Please note that ndelay(), udelay() and mdelay() may return early for - * several reasons: - * 1. computed loops_per_jiffy too low (due to the time taken to - * execute the timer interrupt.) - * 2. cache behaviour affecting the time it takes to execute the - * loop function. - * 3. CPU clock rate changes. - * - * Please see this thread: - * https://lists.openwall.net/linux-kernel/2011/01/09/56 + * Sleep routines using timer list timers or hrtimers. */ #include <linux/math.h> #include <linux/sched.h> +#include <linux/jiffies.h> extern unsigned long loops_per_jiffy; @@ -35,12 +26,21 @@ extern unsigned long loops_per_jiffy; * The 2nd mdelay() definition ensures GCC will optimize away the * while loop for the common cases where n <= MAX_UDELAY_MS -- Paul G. */ - #ifndef MAX_UDELAY_MS #define MAX_UDELAY_MS 5 #endif #ifndef mdelay +/** + * mdelay - Inserting a delay based on milliseconds with busy waiting + * @n: requested delay in milliseconds + * + * See udelay() for basic information about mdelay() and it's variants. + * + * Please double check, whether mdelay() is the right way to go or whether a + * refactoring of the code is the better variant to be able to use msleep() + * instead. + */ #define mdelay(n) (\ (__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \ ({unsigned long __ms=(n); while (__ms--) udelay(1000);})) @@ -63,30 +63,75 @@ unsigned long msleep_interruptible(unsigned int msecs); void usleep_range_state(unsigned long min, unsigned long max, unsigned int state); +/** + * usleep_range - Sleep for an approximate time + * @min: Minimum time in microseconds to sleep + * @max: Maximum time in microseconds to sleep + * + * For basic information please refere to usleep_range_state(). + * + * The task will be in the state TASK_UNINTERRUPTIBLE during the sleep. + */ static inline void usleep_range(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); } -static inline void usleep_idle_range(unsigned long min, unsigned long max) +/** + * usleep_range_idle - Sleep for an approximate time with idle time accounting + * @min: Minimum time in microseconds to sleep + * @max: Maximum time in microseconds to sleep + * + * For basic information please refere to usleep_range_state(). + * + * The sleeping task has the state TASK_IDLE during the sleep to prevent + * contribution to the load avarage. + */ +static inline void usleep_range_idle(unsigned long min, unsigned long max) { usleep_range_state(min, max, TASK_IDLE); } +/** + * ssleep - wrapper for seconds around msleep + * @seconds: Requested sleep duration in seconds + * + * Please refere to msleep() for detailed information. + */ static inline void ssleep(unsigned int seconds) { msleep(seconds * 1000); } -/* see Documentation/timers/timers-howto.rst for the thresholds */ +static const unsigned int max_slack_shift = 2; +#define USLEEP_RANGE_UPPER_BOUND ((TICK_NSEC << max_slack_shift) / NSEC_PER_USEC) + +/** + * fsleep - flexible sleep which autoselects the best mechanism + * @usecs: requested sleep duration in microseconds + * + * flseep() selects the best mechanism that will provide maximum 25% slack + * to the requested sleep duration. Therefore it uses: + * + * * udelay() loop for sleep durations <= 10 microseconds to avoid hrtimer + * overhead for really short sleep durations. + * * usleep_range() for sleep durations which would lead with the usage of + * msleep() to a slack larger than 25%. This depends on the granularity of + * jiffies. + * * msleep() for all other sleep durations. + * + * Note: When %CONFIG_HIGH_RES_TIMERS is not set, all sleeps are processed with + * the granularity of jiffies and the slack might exceed 25% especially for + * short sleep durations. + */ static inline void fsleep(unsigned long usecs) { if (usecs <= 10) udelay(usecs); - else if (usecs <= 20000) - usleep_range(usecs, 2 * usecs); + else if (usecs < USLEEP_RANGE_UPPER_BOUND) + usleep_range(usecs, usecs + (usecs >> max_slack_shift)); else - msleep(DIV_ROUND_UP(usecs, 1000)); + msleep(DIV_ROUND_UP(usecs, USEC_PER_MSEC)); } #endif /* defined(_LINUX_DELAY_H) */ diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index ca32b5bb28eb..eb2094e43050 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -276,6 +276,7 @@ do { \ dev_driver_string(dev), dev_name(dev), ## arg) __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +__printf(3, 4) int dev_warn_probe(const struct device *dev, int err, const char *fmt, ...); /* Simple helper for dev_err_probe() when ERR_PTR() is to be returned. */ #define dev_err_ptr_probe(dev, ___err, fmt, ...) \ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 53ca3a913d06..8321f65897f3 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -524,7 +524,6 @@ int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors); void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); -union map_info *dm_get_rq_mapinfo(struct request *rq); #ifdef CONFIG_BLK_DEV_ZONED struct dm_report_zones_args { diff --git a/include/linux/device.h b/include/linux/device.h index 34eb20f5966f..667cb6db9019 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -707,6 +707,8 @@ struct device_physical_location { * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. + * @dma_iommu: Device is using default IOMMU implementation for DMA and + * doesn't rely on dma_ops structure. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -748,7 +750,7 @@ struct device { struct dev_pin_info *pins; #endif struct dev_msi_info msi; -#ifdef CONFIG_DMA_OPS +#ifdef CONFIG_ARCH_HAS_DMA_OPS const struct dma_map_ops *dma_ops; #endif u64 *dma_mask; /* dma mask (if dma'able device) */ @@ -822,6 +824,9 @@ struct device { #ifdef CONFIG_DMA_NEED_SYNC bool dma_skip_sync:1; #endif +#ifdef CONFIG_IOMMU_DMA + bool dma_iommu:1; +#endif }; /** @@ -1073,6 +1078,9 @@ int device_for_each_child(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); int device_for_each_child_reverse(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); +int device_for_each_child_reverse_from(struct device *parent, + struct device *from, const void *data, + int (*fn)(struct device *, const void *)); struct device *device_find_child(struct device *dev, void *data, int (*match)(struct device *dev, void *data)); struct device *device_find_child_by_name(struct device *parent, diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 807831d6bf0f..cdc4757217f9 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -126,6 +126,9 @@ struct bus_attribute { int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr); void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr); +/* Matching function type for drivers/base APIs to find a specific device */ +typedef int (*device_match_t)(struct device *dev, const void *data); + /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); int device_match_of_node(struct device *dev, const void *np); @@ -139,8 +142,7 @@ int device_match_any(struct device *dev, const void *unused); int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *bus_find_device(const struct bus_type *bus, struct device *start, - const void *data, - int (*match)(struct device *dev, const void *data)); + const void *data, device_match_t match); /** * bus_find_device_by_name - device iterator for locating a particular device * of a specific name. diff --git a/include/linux/device/class.h b/include/linux/device/class.h index c576b49c55c2..518c9c83d64b 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -95,7 +95,7 @@ void class_dev_iter_exit(struct class_dev_iter *iter); int class_for_each_device(const struct class *class, const struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *class_find_device(const struct class *class, const struct device *start, - const void *data, int (*match)(struct device *, const void *)); + const void *data, device_match_t match); /** * class_find_device_by_name - device iterator for locating a particular device diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 1fc8b68786de..5c04b8e3833b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -157,7 +157,7 @@ int __must_check driver_for_each_device(struct device_driver *drv, struct device void *data, int (*fn)(struct device *dev, void *)); struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, - int (*match)(struct device *dev, const void *data)); + device_match_t match); /** * driver_find_device_by_name - device iterator for locating a particular device diff --git a/include/linux/dim.h b/include/linux/dim.h index 1b581ff25a15..06543fd40fcc 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -351,7 +351,8 @@ void dim_park_tired(struct dim *dim); * Takes into consideration counter wrap-around. * Returned boolean indicates whether curr_stats are reliable. */ -bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end, +bool dim_calc_stats(const struct dim_sample *start, + const struct dim_sample *end, struct dim_stats *curr_stats); /** @@ -424,7 +425,7 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); * This is the main logic of the algorithm, where data is processed in order * to decide on next required action. */ -void net_dim(struct dim *dim, struct dim_sample end_sample); +void net_dim(struct dim *dim, const struct dim_sample *end_sample); /* RDMA DIM */ diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index edbe13d00776..d7e30d4f7503 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -12,7 +12,7 @@ #include <linux/mem_encrypt.h> #include <linux/swiotlb.h> -extern unsigned int zone_dma_bits; +extern u64 zone_dma_limit; /* * Record the mapping of CPU physical to DMA addresses for a given region. diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 29c5650c1038..079b3dec0a16 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -79,6 +79,12 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) +struct dma_fence_array *dma_fence_array_alloc(int num_fences); +void dma_fence_array_init(struct dma_fence_array *array, + int num_fences, struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any); + struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno, diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index e06bad467f55..e7ad819962e3 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -574,6 +574,12 @@ int dma_fence_get_status(struct dma_fence *fence); * rather than success. This must be set before signaling (so that the value * is visible before any waiters on the signal callback are woken). This * helper exists to help catching erroneous setting of #dma_fence.error. + * + * Examples of error codes which drivers should use: + * + * * %-ENODATA This operation produced no data, no other operation affected. + * * %-ECANCELED All operations from the same context have been canceled. + * * %-ETIME Operation caused a timeout and potentially device reset. */ static inline void dma_fence_set_error(struct dma_fence *fence, int error) diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h index 064bad725061..27d15f60950a 100644 --- a/include/linux/dma-heap.h +++ b/include/linux/dma-heap.h @@ -9,14 +9,13 @@ #ifndef _DMA_HEAPS_H #define _DMA_HEAPS_H -#include <linux/cdev.h> #include <linux/types.h> struct dma_heap; /** * struct dma_heap_ops - ops to operate on a given heap - * @allocate: allocate dmabuf and return struct dma_buf ptr + * @allocate: allocate dmabuf and return struct dma_buf ptr * * allocate returns dmabuf on success, ERR_PTR(-errno) on error. */ @@ -41,28 +40,10 @@ struct dma_heap_export_info { void *priv; }; -/** - * dma_heap_get_drvdata() - get per-heap driver data - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The per-heap data for the heap. - */ void *dma_heap_get_drvdata(struct dma_heap *heap); -/** - * dma_heap_get_name() - get heap name - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The char* for the heap name. - */ const char *dma_heap_get_name(struct dma_heap *heap); -/** - * dma_heap_add - adds a heap to dmabuf heaps - * @exp_info: information needed to register this heap - */ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); #endif /* _DMA_HEAPS_H */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 02a1c825896b..e172522cd936 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -13,20 +13,7 @@ struct cma; struct iommu_ops; -/* - * Values for struct dma_map_ops.flags: - * - * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can - * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. - * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is - * coherent and it's not an SWIOTLB buffer. - */ -#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) -#define DMA_F_CAN_SKIP_SYNC (1 << 1) - struct dma_map_ops { - unsigned int flags; - void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); @@ -37,11 +24,6 @@ struct dma_map_ops { gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir); - struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size, - enum dma_data_direction dir, gfp_t gfp, - unsigned long attrs); - void (*free_noncontiguous)(struct device *dev, size_t size, - struct sg_table *sgt, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); @@ -88,7 +70,7 @@ struct dma_map_ops { unsigned long (*get_merge_boundary)(struct device *dev); }; -#ifdef CONFIG_DMA_OPS +#ifdef CONFIG_ARCH_HAS_DMA_OPS #include <asm/dma-mapping.h> static inline const struct dma_map_ops *get_dma_ops(struct device *dev) @@ -103,7 +85,7 @@ static inline void set_dma_ops(struct device *dev, { dev->dma_ops = dma_ops; } -#else /* CONFIG_DMA_OPS */ +#else /* CONFIG_ARCH_HAS_DMA_OPS */ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return NULL; @@ -112,7 +94,7 @@ static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *dma_ops) { } -#endif /* CONFIG_DMA_OPS */ +#endif /* CONFIG_ARCH_HAS_DMA_OPS */ #ifdef CONFIG_DMA_CMA extern struct cma *dma_contiguous_default_area; @@ -219,20 +201,6 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_GLOBAL_POOL */ -/* - * This is the actual return value from the ->alloc_noncontiguous method. - * The users of the DMA API should only care about the sg_table, but to make - * the DMA-API internal vmaping and freeing easier we stash away the page - * array as well (except for the fallback case). This can go away any time, - * e.g. when a vmap-variant that takes a scatterlist comes along. - */ -struct dma_sgt_handle { - struct sg_table sgt; - struct page **pages; -}; -#define sgt_handle(sgt) \ - container_of((sgt), struct dma_sgt_handle, sgt) - int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); @@ -274,7 +242,7 @@ static inline bool dev_is_dma_coherent(struct device *dev) { return true; } -#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +#endif static inline void dma_reset_need_sync(struct device *dev) { diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f693aafe221f..b79925b1c433 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -2,15 +2,11 @@ #ifndef _LINUX_DMA_MAPPING_H #define _LINUX_DMA_MAPPING_H -#include <linux/cache.h> -#include <linux/sizes.h> -#include <linux/string.h> #include <linux/device.h> #include <linux/err.h> #include <linux/dma-direction.h> #include <linux/scatterlist.h> #include <linux/bug.h> -#include <linux/mem_encrypt.h> /** * List of possible attributes associated with a DMA mapping. The semantics @@ -524,13 +520,11 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) +static inline void dma_set_max_seg_size(struct device *dev, unsigned int size) { - if (dev->dma_parms) { - dev->dma_parms->max_segment_size = size; - return 0; - } - return -EIO; + if (WARN_ON_ONCE(!dev->dma_parms)) + return; + dev->dma_parms->max_segment_size = size; } static inline unsigned long dma_get_seg_boundary(struct device *dev) @@ -559,13 +553,11 @@ static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, return (dma_get_seg_boundary(dev) >> page_shift) + 1; } -static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) +static inline void dma_set_seg_boundary(struct device *dev, unsigned long mask) { - if (dev->dma_parms) { - dev->dma_parms->segment_boundary_mask = mask; - return 0; - } - return -EIO; + if (WARN_ON_ONCE(!dev->dma_parms)) + return; + dev->dma_parms->segment_boundary_mask = mask; } static inline unsigned int dma_get_min_align_mask(struct device *dev) @@ -575,13 +567,12 @@ static inline unsigned int dma_get_min_align_mask(struct device *dev) return 0; } -static inline int dma_set_min_align_mask(struct device *dev, +static inline void dma_set_min_align_mask(struct device *dev, unsigned int min_align_mask) { if (WARN_ON_ONCE(!dev->dma_parms)) - return -EIO; + return; dev->dma_parms->min_align_mask = min_align_mask; - return 0; } #ifndef dma_get_cache_alignment diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 8d0e34dad446..c5ab6fd9ebe8 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -105,10 +105,10 @@ enum dma_resv_usage { * This should be used by submissions which don't want to participate in * any implicit synchronization. * - * The most common case are preemption fences, page table updates, TLB - * flushes as well as explicit synced user submissions. + * The most common cases are preemption fences, page table updates, TLB + * flushes as well as explicitly synced user submissions. * - * Explicit synced user user submissions can be promoted to + * Explicitly synced user submissions can be promoted to * DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE as needed using * dma_buf_import_sync_file() when implicit synchronization should * become necessary after initial adding of the fence. diff --git a/include/linux/dma/ipu-dma.h b/include/linux/dma/ipu-dma.h deleted file mode 100644 index 6969391580d2..000000000000 --- a/include/linux/dma/ipu-dma.h +++ /dev/null @@ -1,174 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008 - * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de> - * - * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. - */ - -#ifndef __LINUX_DMA_IPU_DMA_H -#define __LINUX_DMA_IPU_DMA_H - -#include <linux/types.h> -#include <linux/dmaengine.h> - -/* IPU DMA Controller channel definitions. */ -enum ipu_channel { - IDMAC_IC_0 = 0, /* IC (encoding task) to memory */ - IDMAC_IC_1 = 1, /* IC (viewfinder task) to memory */ - IDMAC_ADC_0 = 1, - IDMAC_IC_2 = 2, - IDMAC_ADC_1 = 2, - IDMAC_IC_3 = 3, - IDMAC_IC_4 = 4, - IDMAC_IC_5 = 5, - IDMAC_IC_6 = 6, - IDMAC_IC_7 = 7, /* IC (sensor data) to memory */ - IDMAC_IC_8 = 8, - IDMAC_IC_9 = 9, - IDMAC_IC_10 = 10, - IDMAC_IC_11 = 11, - IDMAC_IC_12 = 12, - IDMAC_IC_13 = 13, - IDMAC_SDC_0 = 14, /* Background synchronous display data */ - IDMAC_SDC_1 = 15, /* Foreground data (overlay) */ - IDMAC_SDC_2 = 16, - IDMAC_SDC_3 = 17, - IDMAC_ADC_2 = 18, - IDMAC_ADC_3 = 19, - IDMAC_ADC_4 = 20, - IDMAC_ADC_5 = 21, - IDMAC_ADC_6 = 22, - IDMAC_ADC_7 = 23, - IDMAC_PF_0 = 24, - IDMAC_PF_1 = 25, - IDMAC_PF_2 = 26, - IDMAC_PF_3 = 27, - IDMAC_PF_4 = 28, - IDMAC_PF_5 = 29, - IDMAC_PF_6 = 30, - IDMAC_PF_7 = 31, -}; - -/* Order significant! */ -enum ipu_channel_status { - IPU_CHANNEL_FREE, - IPU_CHANNEL_INITIALIZED, - IPU_CHANNEL_READY, - IPU_CHANNEL_ENABLED, -}; - -#define IPU_CHANNELS_NUM 32 - -enum pixel_fmt { - /* 1 byte */ - IPU_PIX_FMT_GENERIC, - IPU_PIX_FMT_RGB332, - IPU_PIX_FMT_YUV420P, - IPU_PIX_FMT_YUV422P, - IPU_PIX_FMT_YUV420P2, - IPU_PIX_FMT_YVU422P, - /* 2 bytes */ - IPU_PIX_FMT_RGB565, - IPU_PIX_FMT_RGB666, - IPU_PIX_FMT_BGR666, - IPU_PIX_FMT_YUYV, - IPU_PIX_FMT_UYVY, - /* 3 bytes */ - IPU_PIX_FMT_RGB24, - IPU_PIX_FMT_BGR24, - /* 4 bytes */ - IPU_PIX_FMT_GENERIC_32, - IPU_PIX_FMT_RGB32, - IPU_PIX_FMT_BGR32, - IPU_PIX_FMT_ABGR32, - IPU_PIX_FMT_BGRA32, - IPU_PIX_FMT_RGBA32, -}; - -enum ipu_color_space { - IPU_COLORSPACE_RGB, - IPU_COLORSPACE_YCBCR, - IPU_COLORSPACE_YUV -}; - -/* - * Enumeration of IPU rotation modes - */ -enum ipu_rotate_mode { - /* Note the enum values correspond to BAM value */ - IPU_ROTATE_NONE = 0, - IPU_ROTATE_VERT_FLIP = 1, - IPU_ROTATE_HORIZ_FLIP = 2, - IPU_ROTATE_180 = 3, - IPU_ROTATE_90_RIGHT = 4, - IPU_ROTATE_90_RIGHT_VFLIP = 5, - IPU_ROTATE_90_RIGHT_HFLIP = 6, - IPU_ROTATE_90_LEFT = 7, -}; - -/* - * Enumeration of DI ports for ADC. - */ -enum display_port { - DISP0, - DISP1, - DISP2, - DISP3 -}; - -struct idmac_video_param { - unsigned short in_width; - unsigned short in_height; - uint32_t in_pixel_fmt; - unsigned short out_width; - unsigned short out_height; - uint32_t out_pixel_fmt; - unsigned short out_stride; - bool graphics_combine_en; - bool global_alpha_en; - bool key_color_en; - enum display_port disp; - unsigned short out_left; - unsigned short out_top; -}; - -/* - * Union of initialization parameters for a logical channel. So far only video - * parameters are used. - */ -union ipu_channel_param { - struct idmac_video_param video; -}; - -struct idmac_tx_desc { - struct dma_async_tx_descriptor txd; - struct scatterlist *sg; /* scatterlist for this */ - unsigned int sg_len; /* tx-descriptor. */ - struct list_head list; -}; - -struct idmac_channel { - struct dma_chan dma_chan; - dma_cookie_t completed; /* last completed cookie */ - union ipu_channel_param params; - enum ipu_channel link; /* input channel, linked to the output */ - enum ipu_channel_status status; - void *client; /* Only one client per channel */ - unsigned int n_tx_desc; - struct idmac_tx_desc *desc; /* allocated tx-descriptors */ - struct scatterlist *sg[2]; /* scatterlist elements in buffer-0 and -1 */ - struct list_head free_list; /* free tx-descriptors */ - struct list_head queue; /* queued tx-descriptors */ - spinlock_t lock; /* protects sg[0,1], queue */ - struct mutex chan_mutex; /* protects status, cookie, free_list */ - bool sec_chan_en; - int active_buffer; - unsigned int eof_irq; - char eof_name[16]; /* EOF IRQ name for request_irq() */ -}; - -#define to_tx_desc(tx) container_of(tx, struct idmac_tx_desc, txd) -#define to_idmac_chan(c) container_of(c, struct idmac_channel, dma_chan) - -#endif /* __LINUX_DMA_IPU_DMA_H */ diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index 1e491c5dcac2..2dea217629d0 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -136,8 +136,6 @@ u32 k3_udma_glue_rx_flow_get_fdq_id(struct k3_udma_glue_rx_channel *rx_chn, u32 k3_udma_glue_rx_get_flow_id_base(struct k3_udma_glue_rx_channel *rx_chn); int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num); -void k3_udma_glue_rx_put_irq(struct k3_udma_glue_rx_channel *rx_chn, - u32 flow_num); void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num, void *data, void (*cleanup)(void *data, dma_addr_t desc_dma), diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 499bb2c63483..692b2b445761 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -292,7 +292,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src) struct irq_data; extern void dmar_msi_unmask(struct irq_data *data); extern void dmar_msi_mask(struct irq_data *data); -extern void dmar_msi_read(int irq, struct msi_msg *msg); extern void dmar_msi_write(int irq, struct msi_msg *msg); extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); diff --git a/include/linux/dpll.h b/include/linux/dpll.h index d275736230b3..5e4f9ab1cf75 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -15,6 +15,7 @@ struct dpll_device; struct dpll_pin; +struct dpll_pin_esync; struct dpll_device_ops { int (*mode_get)(const struct dpll_device *dpll, void *dpll_priv, @@ -25,6 +26,10 @@ struct dpll_device_ops { struct netlink_ext_ack *extack); int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, s32 *temp, struct netlink_ext_ack *extack); + int (*clock_quality_level_get)(const struct dpll_device *dpll, + void *dpll_priv, + unsigned long *qls, + struct netlink_ext_ack *extack); }; struct dpll_pin_ops { @@ -83,6 +88,13 @@ struct dpll_pin_ops { int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv, const struct dpll_device *dpll, void *dpll_priv, s64 *ffo, struct netlink_ext_ack *extack); + int (*esync_set)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + u64 freq, struct netlink_ext_ack *extack); + int (*esync_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + struct dpll_pin_esync *esync, + struct netlink_ext_ack *extack); }; struct dpll_pin_frequency { @@ -111,6 +123,13 @@ struct dpll_pin_phase_adjust_range { s32 max; }; +struct dpll_pin_esync { + u64 freq; + const struct dpll_pin_frequency *range; + u8 range_num; + u8 pulse; +}; + struct dpll_pin_properties { const char *board_label; const char *panel_label; diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index 6fbfbde68a37..620a3260fc08 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -15,6 +15,7 @@ struct ocelot_skb_cb { struct sk_buff *clone; unsigned int ptp_class; /* valid only for clones */ + unsigned long ptp_tx_time; /* valid only for clones */ u32 tstamp_lo; u8 ptp_cmd; u8 ts_id; diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h index 82ebf9223948..f8811c46b89e 100644 --- a/include/linux/dw_apb_timer.h +++ b/include/linux/dw_apb_timer.h @@ -34,9 +34,6 @@ struct dw_apb_clocksource { }; void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced); -void dw_apb_clockevent_pause(struct dw_apb_clock_event_device *dw_ced); -void dw_apb_clockevent_resume(struct dw_apb_clock_event_device *dw_ced); -void dw_apb_clockevent_stop(struct dw_apb_clock_event_device *dw_ced); struct dw_apb_clock_event_device * dw_apb_clockevent_init(int cpu, const char *name, unsigned rating, diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 281298e77a15..808b1a5102e7 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -127,7 +127,7 @@ static inline void dql_queued(struct dql *dql, unsigned int count) if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) return; - dql->last_obj_cnt = count; + WRITE_ONCE(dql->last_obj_cnt, count); /* We want to force a write first, so that cpu do not attempt * to get cache line containing last_obj_cnt, num_queued, adj_limit diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h index c860c72a921d..3a485cc0e0fa 100644 --- a/include/linux/eeprom_93cx6.h +++ b/include/linux/eeprom_93cx6.h @@ -11,6 +11,8 @@ Supported chipsets: 93c46, 93c56 and 93c66. */ +#include <linux/bits.h> + /* * EEPROM operation defines. */ @@ -34,6 +36,7 @@ * @register_write(struct eeprom_93cx6 *eeprom): handler to * write to the eeprom register by using all reg_* fields. * @width: eeprom width, should be one of the PCI_EEPROM_WIDTH_* defines + * @quirks: eeprom or controller quirks * @drive_data: Set if we're driving the data line. * @reg_data_in: register field to indicate data input * @reg_data_out: register field to indicate data output @@ -50,6 +53,9 @@ struct eeprom_93cx6 { void (*register_write)(struct eeprom_93cx6 *eeprom); int width; + unsigned int quirks; +/* Some EEPROMs require an extra clock cycle before reading */ +#define PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE BIT(0) char drive_data; char reg_data_in; @@ -71,3 +77,8 @@ extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable); extern void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data); + +static inline bool has_quirk_extra_read_cycle(struct eeprom_93cx6 *eeprom) +{ + return eeprom->quirks & PCI_EEPROM_QUIRK_EXTRA_READ_CYCLE; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 6bf3c4fe8511..e5815867aba9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -379,7 +379,6 @@ void efi_native_runtime_setup(void); #define EFI_SYSTEM_RESOURCE_TABLE_GUID EFI_GUID(0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80) #define EFI_FILE_SYSTEM_GUID EFI_GUID(0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define DEVICE_TREE_GUID EFI_GUID(0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0) -#define EFI_PROPERTIES_TABLE_GUID EFI_GUID(0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5) #define EFI_RNG_PROTOCOL_GUID EFI_GUID(0x3152bca5, 0xeade, 0x433d, 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44) #define EFI_RNG_ALGORITHM_RAW EFI_GUID(0xe43176d7, 0xb6e8, 0x4827, 0xb7, 0x84, 0x7f, 0xfd, 0xc4, 0xb6, 0x85, 0x61) #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) @@ -581,15 +580,6 @@ struct efi_mem_range { }; typedef struct { - u32 version; - u32 length; - u64 memory_protection_attribute; -} efi_properties_table_t; - -#define EFI_PROPERTIES_TABLE_VERSION 0x00010000 -#define EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA 0x1 - -typedef struct { u16 version; u16 length; u32 runtime_services_supported; @@ -764,8 +754,6 @@ extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); -extern void efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource, struct resource *bss_resource); extern u64 efi_get_fdt_params(struct efi_memory_map_data *data); extern struct kobject *efi_kobj; @@ -873,10 +861,9 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) #define EFI_PARAVIRT 6 /* Access is via a paravirt interface */ #define EFI_ARCH_1 7 /* First arch-specific bit */ #define EFI_DBG 8 /* Print additional debug info at runtime */ -#define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ -#define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ -#define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ -#define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ +#define EFI_MEM_ATTR 9 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ +#define EFI_MEM_NO_SOFT_RESERVE 10 /* Is the kernel configured to ignore soft reservations? */ +#define EFI_PRESERVE_BS_REGIONS 11 /* Are EFI boot-services memory segments available? */ #ifdef CONFIG_EFI /* diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h deleted file mode 100644 index 624ff6ff41f9..000000000000 --- a/include/linux/einj-cxl.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL protocol Error INJection support. - * - * Copyright (c) 2023 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Author: Ben Cheatham <benjamin.cheatham@amd.com> - */ -#ifndef EINJ_CXL_H -#define EINJ_CXL_H - -#include <linux/errno.h> -#include <linux/types.h> - -struct pci_dev; -struct seq_file; - -#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) -int einj_cxl_available_error_type_show(struct seq_file *m, void *v); -int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); -int einj_cxl_inject_rch_error(u64 rcrb, u64 type); -bool einj_cxl_is_initialized(void); -#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */ -static inline int einj_cxl_available_error_type_show(struct seq_file *m, - void *v) -{ - return -ENXIO; -} - -static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) -{ - return -ENXIO; -} - -static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) -{ - return -ENXIO; -} - -static inline bool einj_cxl_is_initialized(void) { return false; } -#endif /* CONFIG_ACPI_APEI_EINJ_CXL */ - -#endif /* EINJ_CXL_H */ diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 1ff52020cf75..752e0b297582 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -55,6 +55,8 @@ struct em_perf_table { * struct em_perf_domain - Performance domain * @em_table: Pointer to the runtime modifiable em_perf_table * @nr_perf_states: Number of performance states + * @min_perf_state: Minimum allowed Performance State index + * @max_perf_state: Maximum allowed Performance State index * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here * for performance reasons to avoid potential cache @@ -70,6 +72,8 @@ struct em_perf_table { struct em_perf_domain { struct em_perf_table __rcu *em_table; int nr_perf_states; + int min_perf_state; + int max_perf_state; unsigned long flags; unsigned long cpus[]; }; @@ -173,13 +177,14 @@ void em_table_free(struct em_perf_table __rcu *table); int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int nr_states); int em_dev_update_chip_binning(struct device *dev); +int em_update_performance_limits(struct em_perf_domain *pd, + unsigned long freq_min_khz, unsigned long freq_max_khz); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM * @table: List of performance states, in ascending order - * @nr_perf_states: Number of performance states + * @pd: performance domain for which this must be done * @max_util: Max utilization to map with the EM - * @pd_flags: Performance Domain flags * * It is called from the scheduler code quite frequently and as a consequence * doesn't implement any check. @@ -188,13 +193,16 @@ int em_dev_update_chip_binning(struct device *dev); * requirement. */ static inline int -em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, - unsigned long max_util, unsigned long pd_flags) +em_pd_get_efficient_state(struct em_perf_state *table, + struct em_perf_domain *pd, unsigned long max_util) { + unsigned long pd_flags = pd->flags; + int min_ps = pd->min_perf_state; + int max_ps = pd->max_perf_state; struct em_perf_state *ps; int i; - for (i = 0; i < nr_perf_states; i++) { + for (i = min_ps; i <= max_ps; i++) { ps = &table[i]; if (ps->performance >= max_util) { if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && @@ -204,7 +212,7 @@ em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, } } - return nr_perf_states - 1; + return max_ps; } /** @@ -253,8 +261,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * requested performance. */ em_table = rcu_dereference(pd->em_table); - i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states, - max_util, pd->flags); + i = em_pd_get_efficient_state(em_table->state, pd, max_util); ps = &em_table->state[i]; /* @@ -391,6 +398,12 @@ static inline int em_dev_update_chip_binning(struct device *dev) { return -EINVAL; } +static inline +int em_update_performance_limits(struct em_perf_domain *pd, + unsigned long freq_min_khz, unsigned long freq_max_khz) +{ + return -EINVAL; +} #endif #endif diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index b0fb775a600d..fc61d0205c97 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -64,7 +64,8 @@ #define EXIT_TO_USER_MODE_WORK \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ + _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ + _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) /** @@ -108,7 +109,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs) arch_enter_from_user_mode(regs); lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(__ct_state() != CONTEXT_USER); + CT_WARN_ON(__ct_state() != CT_STATE_USER); user_exit_irqoff(); instrumentation_begin(); diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 6813171afccb..16149f6625e4 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -17,8 +17,9 @@ #endif #define XFER_TO_GUEST_MODE_WORK \ - (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \ - _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK) + (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \ + _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \ + ARCH_XFER_TO_GUEST_MODE_WORK) struct kvm_vcpu; diff --git a/include/linux/err.h b/include/linux/err.h index b5d9bb2a2349..a4dacd745fcf 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -41,6 +41,9 @@ static inline void * __must_check ERR_PTR(long error) return (void *) error; } +/* Return the pointer in the percpu address space. */ +#define ERR_PTR_PCPU(error) ((void __percpu *)(unsigned long)ERR_PTR(error)) + /** * PTR_ERR - Extract the error code from an error pointer. * @ptr: An error pointer. @@ -51,6 +54,9 @@ static inline long __must_check PTR_ERR(__force const void *ptr) return (long) ptr; } +/* Read an error pointer from the percpu address space. */ +#define PTR_ERR_PCPU(ptr) (PTR_ERR((const void *)(__force const unsigned long)(ptr))) + /** * IS_ERR - Detect an error pointer. * @ptr: The pointer to check. @@ -61,6 +67,9 @@ static inline bool __must_check IS_ERR(__force const void *ptr) return IS_ERR_VALUE((unsigned long)ptr); } +/* Read an error pointer from the percpu address space. */ +#define IS_ERR_PCPU(ptr) (IS_ERR((const void *)(__force const unsigned long)(ptr))) + /** * IS_ERR_OR_NULL - Detect an error pointer or a null pointer. * @ptr: The pointer to check. diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 0ed47d00549b..ecf203f01034 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -21,7 +21,7 @@ #include <linux/netdevice.h> #include <linux/random.h> #include <linux/crc32.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/bitsperlong.h> #ifdef __KERNEL__ @@ -645,7 +645,7 @@ static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb) } /** - * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame + * eth_skb_pad - Pad buffer to minimum number of octets for Ethernet frame * @skb: Buffer to pad * * An Ethernet frame should have a minimum size of 60 bytes. This function diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 989c94eddb2b..b8b935b52603 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -727,9 +727,16 @@ struct kernel_ethtool_ts_info { * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. * @cap_rss_ctx_supported: indicates if the driver supports RSS - * contexts. + * contexts via legacy API, drivers implementing @create_rxfh_context + * do not have to set this bit. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_per_ctx_key: device supports setting different RSS key for each + * additional context. Netlink API should report hfunc, key, and input_xfrm + * for every context, not just context 0. + * @cap_rss_rxnfc_adds: device supports nonzero ring_cookie in filters with + * %FLOW_RSS flag; the queue ID from the filter is added to the value from + * the indirection table to determine the delivery queue. * @rxfh_indir_space: max size of RSS indirection tables, if indirection table * size as returned by @get_rxfh_indir_size may change during lifetime * of the device. Leave as 0 if the table size is constant. @@ -951,6 +958,8 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u32 rxfh_per_ctx_key:1; + u32 cap_rss_rxnfc_adds:1; u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index fae0dfb9a9c8..aba91335273a 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -23,8 +23,10 @@ struct phy_device; int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd); void ethnl_cable_test_free(struct phy_device *phydev); void ethnl_cable_test_finished(struct phy_device *phydev); -int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result); -int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm); +int ethnl_cable_test_result_with_src(struct phy_device *phydev, u8 pair, + u8 result, u32 src); +int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, u8 pair, + u32 cm, u32 src); int ethnl_cable_test_amplitude(struct phy_device *phydev, u8 pair, s16 mV); int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV); int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last, @@ -54,14 +56,14 @@ static inline void ethnl_cable_test_free(struct phy_device *phydev) static inline void ethnl_cable_test_finished(struct phy_device *phydev) { } -static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, - u8 result) +static inline int ethnl_cable_test_result_with_src(struct phy_device *phydev, + u8 pair, u8 result, u32 src) { return -EOPNOTSUPP; } -static inline int ethnl_cable_test_fault_length(struct phy_device *phydev, - u8 pair, u32 cm) +static inline int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, + u8 pair, u32 cm, u32 src) { return -EOPNOTSUPP; } @@ -119,4 +121,19 @@ static inline bool ethtool_dev_mm_supported(struct net_device *dev) } #endif /* IS_ENABLED(CONFIG_ETHTOOL_NETLINK) */ + +static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, + u8 result) +{ + return ethnl_cable_test_result_with_src(phydev, pair, result, + ETHTOOL_A_CABLE_INF_SRC_TDR); +} + +static inline int ethnl_cable_test_fault_length(struct phy_device *phydev, + u8 pair, u32 cm) +{ + return ethnl_cable_test_fault_length_with_src(phydev, pair, cm, + ETHTOOL_A_CABLE_INF_SRC_TDR); +} + #endif /* _LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 3337745d81bd..0c0d00fcd131 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -42,7 +42,7 @@ static inline void eventpoll_release(struct file *file) * because the file in on the way to be removed and nobody ( but * eventpoll ) has still a reference to this file. */ - if (likely(!file->f_ep)) + if (likely(!READ_ONCE(file->f_ep))) return; /* diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 32cef1144117..64130ae19690 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -46,11 +46,27 @@ enum execmem_type { /** * enum execmem_range_flags - options for executable memory allocations * @EXECMEM_KASAN_SHADOW: allocate kasan shadow + * @EXECMEM_ROX_CACHE: allocations should use ROX cache of huge pages */ enum execmem_range_flags { EXECMEM_KASAN_SHADOW = (1 << 0), + EXECMEM_ROX_CACHE = (1 << 1), }; +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX +/** + * execmem_fill_trapping_insns - set memory to contain instructions that + * will trap + * @ptr: pointer to memory to fill + * @size: size of the range to fill + * @writable: is the memory poited by @ptr is writable or ROX + * + * A hook for architecures to fill execmem ranges with invalid instructions. + * Architectures that use EXECMEM_ROX_CACHE must implement this. + */ +void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); +#endif + /** * struct execmem_range - definition of an address space suitable for code and * related data allocations @@ -123,6 +139,39 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +#ifdef CONFIG_MMU +/** + * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory + * @size: size of the virtual mapping in bytes + * + * Maps virtually contiguous area in the range suitable for EXECMEM_MODULE_DATA. + * + * Return: the area descriptor on success or %NULL on failure. + */ +struct vm_struct *execmem_vmap(size_t size); +#endif + +/** + * execmem_update_copy - copy an update to executable memory + * @dst: destination address to update + * @src: source address containing the data + * @size: how many bytes of memory shold be copied + * + * Copy @size bytes from @src to @dst using text poking if the memory at + * @dst is read-only. + * + * Return: a pointer to @dst or NULL on error + */ +void *execmem_update_copy(void *dst, const void *src, size_t size); + +/** + * execmem_is_rox - check if execmem is read-only + * @type - the execmem type to check + * + * Return: %true if the @type is read-only, %false if it's writable + */ +bool execmem_is_rox(enum execmem_type type); + #if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE) void execmem_init(void); #else diff --git a/include/linux/export.h b/include/linux/export.h index 0bbd02fd351d..2633df4d31e6 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -60,14 +60,14 @@ #endif #ifdef DEFAULT_SYMBOL_NAMESPACE -#define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE)) +#define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, DEFAULT_SYMBOL_NAMESPACE) #else #define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, "") #endif #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "") #define EXPORT_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "GPL") -#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", __stringify(ns)) -#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", __stringify(ns)) +#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) +#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) #endif /* _LINUX_EXPORT_H */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 893a1d21dc1c..4cc8801e50e3 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -160,6 +160,19 @@ struct fid { #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ #define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ +/* + * Filesystems use only lower 8 bits of file_handle type for fid_type. + * name_to_handle_at() uses upper 16 bits of type as user flags to be + * interpreted by open_by_handle_at(). + */ +#define FILEID_USER_FLAGS_MASK 0xffff0000 +#define FILEID_USER_FLAGS(type) ((type) & FILEID_USER_FLAGS_MASK) + +/* Flags supported in encoded handle_type that is exported to user */ +#define FILEID_IS_CONNECTABLE 0x10000 +#define FILEID_IS_DIR 0x20000 +#define FILEID_VALID_USER_FLAGS (FILEID_IS_CONNECTABLE | FILEID_IS_DIR) + /** * struct export_operations - for nfsd to communicate with file systems * @encode_fh: encode a file handle fragment from a dentry @@ -250,19 +263,6 @@ struct export_operations { unsigned long flags; }; -/** - * exportfs_lock_op_is_async() - export op supports async lock operation - * @export_ops: the nfs export operations to check - * - * Returns true if the nfs export_operations structure has - * EXPORT_OP_ASYNC_LOCK in their flags set - */ -static inline bool -exportfs_lock_op_is_async(const struct export_operations *export_ops) -{ - return export_ops->flags & EXPORT_OP_ASYNC_LOCK; -} - extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 01bee2b289c2..c24f8bc01045 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,15 +19,16 @@ #define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ #define F2FS_EXTENSION_LEN 8 /* max size of extension */ -#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ #define COMPRESS_ADDR ((block_t)-2) /* used as compressed data flag */ -#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) -#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) +#define F2FS_BLKSIZE_MASK (F2FS_BLKSIZE - 1) +#define F2FS_BYTES_TO_BLK(bytes) ((unsigned long long)(bytes) >> F2FS_BLKSIZE_BITS) +#define F2FS_BLK_TO_BYTES(blk) ((unsigned long long)(blk) << F2FS_BLKSIZE_BITS) #define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) +#define F2FS_BLK_ALIGN(x) (F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1)) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 @@ -278,7 +279,7 @@ struct node_footer { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ -#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries (obsolete) */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ #define F2FS_PIN_FILE 0x40 /* file should not be gced */ #define F2FS_COMPRESS_RELEASED 0x80 /* file released compressed blocks */ diff --git a/include/linux/falloc.h b/include/linux/falloc.h index f3f0b97b1675..3f49f3df6af5 100644 --- a/include/linux/falloc.h +++ b/include/linux/falloc.h @@ -25,12 +25,18 @@ struct space_resv { #define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv) #define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv) -#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \ - FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | \ - FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | \ - FALLOC_FL_UNSHARE_RANGE) +/* + * Mask of all supported fallocate modes. Only one can be set at a time. + * + * In addition to the mode bit, the mode argument can also encode flags. + * FALLOC_FL_KEEP_SIZE is the only supported flag so far. + */ +#define FALLOC_FL_MODE_MASK (FALLOC_FL_ALLOCATE_RANGE | \ + FALLOC_FL_PUNCH_HOLE | \ + FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | \ + FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) /* on ia32 l_start is on a 32-bit boundary */ #if defined(CONFIG_X86_64) diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 4f1c4f603118..89ff45bd6f01 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -36,6 +36,7 @@ #define FANOTIFY_ADMIN_INIT_FLAGS (FANOTIFY_PERM_CLASSES | \ FAN_REPORT_TID | \ FAN_REPORT_PIDFD | \ + FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ FAN_UNLIMITED_MARKS) diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 354413950d34..8c829d28dcf3 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -2,13 +2,17 @@ #ifndef _LINUX_FAULT_INJECT_H #define _LINUX_FAULT_INJECT_H +#include <linux/err.h> +#include <linux/types.h> + +struct dentry; +struct kmem_cache; + #ifdef CONFIG_FAULT_INJECTION -#include <linux/types.h> -#include <linux/debugfs.h> +#include <linux/atomic.h> #include <linux/configfs.h> #include <linux/ratelimit.h> -#include <linux/atomic.h> /* * For explanation of the elements of this struct, see @@ -51,6 +55,28 @@ int setup_fault_attr(struct fault_attr *attr, char *str); bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); +#else /* CONFIG_FAULT_INJECTION */ + +struct fault_attr { +}; + +#define DECLARE_FAULT_ATTR(name) struct fault_attr name = {} + +static inline int setup_fault_attr(struct fault_attr *attr, char *str) +{ + return 0; /* Note: 0 means error for __setup() handlers! */ +} +static inline bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) +{ + return false; +} +static inline bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return false; +} + +#endif /* CONFIG_FAULT_INJECTION */ + #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS struct dentry *fault_create_debugfs_attr(const char *name, @@ -87,10 +113,6 @@ static inline void fault_config_init(struct fault_config *config, #endif /* CONFIG_FAULT_INJECTION_CONFIGFS */ -#endif /* CONFIG_FAULT_INJECTION */ - -struct kmem_cache; - #ifdef CONFIG_FAIL_PAGE_ALLOC bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); #else diff --git a/include/linux/fb.h b/include/linux/fb.h index db7d97b10964..5ba187e08cf7 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -21,6 +21,7 @@ struct fb_info; struct file; struct i2c_adapter; struct inode; +struct lcd_device; struct module; struct notifier_block; struct page; @@ -480,6 +481,13 @@ struct fb_info { struct mutex bl_curve_mutex; u8 bl_curve[FB_BACKLIGHT_LEVELS]; #endif + + /* + * Assigned LCD device; set before framebuffer + * registration, remove after unregister + */ + struct lcd_device *lcd_dev; + #ifdef CONFIG_FB_DEFERRED_IO struct delayed_work deferred_work; unsigned long npagerefs; @@ -510,6 +518,7 @@ struct fb_info { void *par; bool skip_vt_switch; /* no VT switch on suspend/resume required */ + bool skip_panic; /* Do not write to the fb after a panic */ }; /* This will go away @@ -601,6 +610,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, /* fbmem.c */ extern int register_framebuffer(struct fb_info *fb_info); extern void unregister_framebuffer(struct fb_info *fb_info); +extern int devm_register_framebuffer(struct device *dev, struct fb_info *fb_info); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); extern void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height, u32 shift_high, u32 shift_low, u32 mod); @@ -752,6 +762,11 @@ static inline struct backlight_device *fb_bl_device(struct fb_info *info) } #endif +static inline struct lcd_device *fb_lcd_device(struct fb_info *info) +{ + return info->lcd_dev; +} + /* fbmon.c */ #define FB_MAXTIMINGS 0 #define FB_VSYNCTIMINGS 1 diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 2944d4aa413b..c45306a9f007 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -22,7 +22,6 @@ * as this is the granularity returned by copy_fdset(). */ #define NR_OPEN_DEFAULT BITS_PER_LONG -#define NR_OPEN_MAX ~0U struct fdtable { unsigned int max_fds; @@ -93,10 +92,6 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un return files_lookup_fd_raw(files, fd); } -struct file *lookup_fdget_rcu(unsigned int fd); -struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd); -struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd); - static inline bool close_on_exec(unsigned int fd, const struct files_struct *files) { return test_bit(fd, files_fdtable(files)->close_on_exec); @@ -106,17 +101,17 @@ struct task_struct; void put_files_struct(struct files_struct *fs); int unshare_files(void); -struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy; +struct fd_range { + unsigned int from, to; +}; +struct files_struct *dup_fd(struct files_struct *, struct fd_range *) __latent_entropy; void do_close_on_exec(struct files_struct *); int iterate_fd(struct files_struct *, unsigned, int (*)(const void *, struct file *, unsigned), const void *); extern int close_fd(unsigned int fd); -extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); extern struct file *file_close_fd(unsigned int fd); -extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds, - struct files_struct **new_fdp); extern struct kmem_cache *files_cachep; diff --git a/include/linux/file.h b/include/linux/file.h index 59b146a14dca..302f11355b10 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -11,6 +11,7 @@ #include <linux/posix_types.h> #include <linux/errno.h> #include <linux/cleanup.h> +#include <linux/err.h> struct file; @@ -29,62 +30,59 @@ extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount extern struct file *alloc_file_clone(struct file *, int flags, const struct file_operations *); -static inline void fput_light(struct file *file, int fput_needed) -{ - if (fput_needed) - fput(file); -} - +/* either a reference to struct file + flags + * (cloned vs. borrowed, pos locked), with + * flags stored in lower bits of value, + * or empty (represented by 0). + */ struct fd { - struct file *file; - unsigned int flags; + unsigned long word; }; #define FDPUT_FPUT 1 #define FDPUT_POS_UNLOCK 2 -static inline void fdput(struct fd fd) +#define fd_file(f) ((struct file *)((f).word & ~(FDPUT_FPUT|FDPUT_POS_UNLOCK))) +static inline bool fd_empty(struct fd f) { - if (fd.flags & FDPUT_FPUT) - fput(fd.file); + return unlikely(!f.word); } -extern struct file *fget(unsigned int fd); -extern struct file *fget_raw(unsigned int fd); -extern struct file *fget_task(struct task_struct *task, unsigned int fd); -extern unsigned long __fdget(unsigned int fd); -extern unsigned long __fdget_raw(unsigned int fd); -extern unsigned long __fdget_pos(unsigned int fd); -extern void __f_unlock_pos(struct file *); - -static inline struct fd __to_fd(unsigned long v) +#define EMPTY_FD (struct fd){0} +static inline struct fd BORROWED_FD(struct file *f) { - return (struct fd){(struct file *)(v & ~3),v & 3}; + return (struct fd){(unsigned long)f}; } - -static inline struct fd fdget(unsigned int fd) +static inline struct fd CLONED_FD(struct file *f) { - return __to_fd(__fdget(fd)); + return (struct fd){(unsigned long)f | FDPUT_FPUT}; } -static inline struct fd fdget_raw(unsigned int fd) +static inline void fdput(struct fd fd) { - return __to_fd(__fdget_raw(fd)); + if (fd.word & FDPUT_FPUT) + fput(fd_file(fd)); } -static inline struct fd fdget_pos(int fd) -{ - return __to_fd(__fdget_pos(fd)); -} +extern struct file *fget(unsigned int fd); +extern struct file *fget_raw(unsigned int fd); +extern struct file *fget_task(struct task_struct *task, unsigned int fd); +extern struct file *fget_task_next(struct task_struct *task, unsigned int *fd); +extern void __f_unlock_pos(struct file *); + +struct fd fdget(unsigned int fd); +struct fd fdget_raw(unsigned int fd); +struct fd fdget_pos(unsigned int fd); static inline void fdput_pos(struct fd f) { - if (f.flags & FDPUT_POS_UNLOCK) - __f_unlock_pos(f.file); + if (f.word & FDPUT_POS_UNLOCK) + __f_unlock_pos(fd_file(f)); fdput(f); } DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd) +DEFINE_CLASS(fd_pos, struct fd, fdput_pos(_T), fdget_pos(fd), int fd) extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); @@ -96,6 +94,7 @@ extern void put_unused_fd(unsigned int fd); DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), get_unused_fd_flags(flags), unsigned flags) +DEFINE_FREE(fput, struct file *, if (!IS_ERR_OR_NULL(_T)) fput(_T)) /* * take_fd() will take care to set @fd to -EBADF ensuring that diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h new file mode 100644 index 000000000000..9b3a8d9b17ab --- /dev/null +++ b/include/linux/file_ref.h @@ -0,0 +1,177 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _LINUX_FILE_REF_H +#define _LINUX_FILE_REF_H + +#include <linux/atomic.h> +#include <linux/preempt.h> +#include <linux/types.h> + +/* + * file_ref is a reference count implementation specifically for use by + * files. It takes inspiration from rcuref but differs in key aspects + * such as support for SLAB_TYPESAFE_BY_RCU type caches. + * + * FILE_REF_ONEREF FILE_REF_MAXREF + * 0x0000000000000000UL 0x7FFFFFFFFFFFFFFFUL + * <-------------------valid -------------------> + * + * FILE_REF_SATURATED + * 0x8000000000000000UL 0xA000000000000000UL 0xBFFFFFFFFFFFFFFFUL + * <-----------------------saturation zone----------------------> + * + * FILE_REF_RELEASED FILE_REF_DEAD + * 0xC000000000000000UL 0xE000000000000000UL + * <-------------------dead zone-------------------> + * + * FILE_REF_NOREF + * 0xFFFFFFFFFFFFFFFFUL + */ + +#ifdef CONFIG_64BIT +#define FILE_REF_ONEREF 0x0000000000000000UL +#define FILE_REF_MAXREF 0x7FFFFFFFFFFFFFFFUL +#define FILE_REF_SATURATED 0xA000000000000000UL +#define FILE_REF_RELEASED 0xC000000000000000UL +#define FILE_REF_DEAD 0xE000000000000000UL +#define FILE_REF_NOREF 0xFFFFFFFFFFFFFFFFUL +#else +#define FILE_REF_ONEREF 0x00000000U +#define FILE_REF_MAXREF 0x7FFFFFFFU +#define FILE_REF_SATURATED 0xA0000000U +#define FILE_REF_RELEASED 0xC0000000U +#define FILE_REF_DEAD 0xE0000000U +#define FILE_REF_NOREF 0xFFFFFFFFU +#endif + +typedef struct { +#ifdef CONFIG_64BIT + atomic64_t refcnt; +#else + atomic_t refcnt; +#endif +} file_ref_t; + +/** + * file_ref_init - Initialize a file reference count + * @ref: Pointer to the reference count + * @cnt: The initial reference count typically '1' + */ +static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) +{ + atomic_long_set(&ref->refcnt, cnt - 1); +} + +bool __file_ref_put(file_ref_t *ref, unsigned long cnt); + +/** + * file_ref_get - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF. + * + * Provides full memory ordering. + * + * Return: False if the attempt to acquire a reference failed. This happens + * when the last reference has been put already. True if a reference + * was successfully acquired + */ +static __always_inline __must_check bool file_ref_get(file_ref_t *ref) +{ + /* + * Unconditionally increase the reference count with full + * ordering. The saturation and dead zones provide enough + * tolerance for this. + * + * If this indicates negative the file in question the fail can + * be freed and immediately reused due to SLAB_TYPSAFE_BY_RCU. + * Hence, unconditionally altering the file reference count to + * e.g., reset the file reference count back to the middle of + * the deadzone risk end up marking someone else's file as dead + * behind their back. + * + * It would be possible to do a careful: + * + * cnt = atomic_long_inc_return(); + * if (likely(cnt >= 0)) + * return true; + * + * and then something like: + * + * if (cnt >= FILE_REF_RELEASE) + * atomic_long_try_cmpxchg(&ref->refcnt, &cnt, FILE_REF_DEAD), + * + * to set the value back to the middle of the deadzone. But it's + * practically impossible to go from FILE_REF_DEAD to + * FILE_REF_ONEREF. It would need 2305843009213693952/2^61 + * file_ref_get()s to resurrect such a dead file. + */ + return !atomic_long_add_negative(1, &ref->refcnt); +} + +/** + * file_ref_inc - Acquire one reference on a file + * @ref: Pointer to the reference count + * + * Acquire an additional reference on a file. Warns if the caller didn't + * already hold a reference. + */ +static __always_inline void file_ref_inc(file_ref_t *ref) +{ + long prior = atomic_long_fetch_inc_relaxed(&ref->refcnt); + WARN_ONCE(prior < 0, "file_ref_inc() on a released file reference"); +} + +/** + * file_ref_put -- Release a file reference + * @ref: Pointer to the reference count + * + * Provides release memory ordering, such that prior loads and stores + * are done before, and provides an acquire ordering on success such + * that free() must come after. + * + * Return: True if this was the last reference with no future references + * possible. This signals the caller that it can safely release + * the object which is protected by the reference counter. + * False if there are still active references or the put() raced + * with a concurrent get()/put() pair. Caller is not allowed to + * release the protected object. + */ +static __always_inline __must_check bool file_ref_put(file_ref_t *ref) +{ + long cnt; + + /* + * While files are SLAB_TYPESAFE_BY_RCU and thus file_ref_put() + * calls don't risk UAFs when a file is recyclyed, it is still + * vulnerable to UAFs caused by freeing the whole slab page once + * it becomes unused. Prevent file_ref_put() from being + * preempted protects against this. + */ + guard(preempt)(); + /* + * Unconditionally decrease the reference count. The saturation + * and dead zones provide enough tolerance for this. If this + * fails then we need to handle the last reference drop and + * cases inside the saturation and dead zones. + */ + cnt = atomic_long_dec_return(&ref->refcnt); + if (cnt >= 0) + return false; + return __file_ref_put(ref, cnt); +} + +/** + * file_ref_read - Read the number of file references + * @ref: Pointer to the reference count + * + * Return: The number of held references (0 ... N) + */ +static inline unsigned long file_ref_read(file_ref_t *ref) +{ + unsigned long c = atomic_long_read(&ref->refcnt); + + /* Return 0 if within the DEAD zone. */ + return c >= FILE_REF_RELEASED ? 0 : c + 1; +} + +#endif diff --git a/include/linux/filelock.h b/include/linux/filelock.h index daee999d05f3..c412ded9171e 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl) wake_up(&fl->c.flc_wait); } +static inline bool locks_can_async_lock(const struct file_operations *fops) +{ + return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK; +} + /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); @@ -420,28 +425,38 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) #ifdef CONFIG_FILE_LOCKING static inline int break_lease(struct inode *inode, unsigned int mode) { + struct file_lock_context *flctx; + /* * Since this check is lockless, we must ensure that any refcounts * taken are done before checking i_flctx->flc_lease. Otherwise, we * could end up racing with tasks trying to set a new lease on this * file. */ + flctx = READ_ONCE(inode->i_flctx); + if (!flctx) + return 0; smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + if (!list_empty_careful(&flctx->flc_lease)) return __break_lease(inode, mode, FL_LEASE); return 0; } static inline int break_deleg(struct inode *inode, unsigned int mode) { + struct file_lock_context *flctx; + /* * Since this check is lockless, we must ensure that any refcounts * taken are done before checking i_flctx->flc_lease. Otherwise, we * could end up racing with tasks trying to set a new lease on this * file. */ + flctx = READ_ONCE(inode->i_flctx); + if (!flctx) + return 0; smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) + if (!list_empty_careful(&flctx->flc_lease)) return __break_lease(inode, mode, FL_DELEG); return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index b6672ff61407..0477254bc2d3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -437,6 +437,16 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) +/* Unconditional jumps, gotol pc + imm32 */ + +#define BPF_JMP32_A(IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* Relative call */ #define BPF_CALL_REL(TGT) \ @@ -1109,9 +1119,10 @@ bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); +bool bpf_jit_supports_private_stack(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); -bool bpf_helper_changes_pkt_data(void *func); +bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); static inline bool bpf_dump_raw_ok(const struct cred *cred) { @@ -1616,7 +1627,7 @@ extern struct static_key_false bpf_sk_lookup_enabled; _all_pass || _selected_sk ? SK_PASS : SK_DROP; \ }) -static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, +static inline bool bpf_sk_lookup_run_v4(const struct net *net, int protocol, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 dport, const int ifindex, struct sock **psk) @@ -1653,7 +1664,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, } #if IS_ENABLED(CONFIG_IPV6) -static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, +static inline bool bpf_sk_lookup_run_v6(const struct net *net, int protocol, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, diff --git a/include/linux/find.h b/include/linux/find.h index 5dfca4225fef..68685714bc18 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -52,7 +52,7 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -81,7 +81,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -112,7 +112,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -142,7 +142,7 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -171,7 +171,7 @@ unsigned long find_next_or_bit(const unsigned long *addr1, * Returns the bit number of the next zero bit * If no bits are zero, returns @size. */ -static inline +static __always_inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -198,7 +198,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, * Returns the bit number of the first set bit. * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -224,7 +224,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) * Returns the bit number of the N'th set bit. * If no such, returns >= @size. */ -static inline +static __always_inline unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) { if (n >= size) @@ -249,7 +249,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign * Returns the bit number of the N'th set bit. * If no such, returns @size. */ -static inline +static __always_inline unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { @@ -276,7 +276,7 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long * * Returns the bit number of the N'th set bit. * If no such, returns @size. */ -static inline +static __always_inline unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long n) { @@ -332,7 +332,7 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1, * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size) @@ -357,7 +357,7 @@ unsigned long find_first_and_bit(const unsigned long *addr1, * Returns the bit number for the first set bit * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2, const unsigned long *addr3, @@ -381,7 +381,7 @@ unsigned long find_first_and_and_bit(const unsigned long *addr1, * Returns the bit number of the first cleared bit. * If no bits are zero, returns @size. */ -static inline +static __always_inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -402,7 +402,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) * * Returns the bit number of the last set bit, or size. */ -static inline +static __always_inline unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -425,7 +425,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size) * Returns the bit number for the next set bit, or first set bit up to @offset * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_and_bit_wrap(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) @@ -448,7 +448,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, * Returns the bit number for the next set bit, or first set bit up to @offset * If no bits are set, returns @size. */ -static inline +static __always_inline unsigned long find_next_bit_wrap(const unsigned long *addr, unsigned long size, unsigned long offset) { @@ -465,7 +465,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr, * Helper for for_each_set_bit_wrap(). Make sure you're doing right thing * before using it alone. */ -static inline +static __always_inline unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size, unsigned long start, unsigned long n) { @@ -506,20 +506,20 @@ extern unsigned long find_next_clump8(unsigned long *clump, #if defined(__LITTLE_ENDIAN) -static inline unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset) +static __always_inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { return find_next_zero_bit(addr, size, offset); } -static inline unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset) +static __always_inline +unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { return find_next_bit(addr, size, offset); } -static inline unsigned long find_first_zero_bit_le(const void *addr, - unsigned long size) +static __always_inline +unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) { return find_first_zero_bit(addr, size); } @@ -527,7 +527,7 @@ static inline unsigned long find_first_zero_bit_le(const void *addr, #elif defined(__BIG_ENDIAN) #ifndef find_next_zero_bit_le -static inline +static __always_inline unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset) { @@ -546,7 +546,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned #endif #ifndef find_first_zero_bit_le -static inline +static __always_inline unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) { if (small_const_nbits(size)) { @@ -560,7 +560,7 @@ unsigned long find_first_zero_bit_le(const void *addr, unsigned long size) #endif #ifndef find_next_bit_le -static inline +static __always_inline unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset) { diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 1cca14cf5652..b632eec3ab52 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -134,6 +134,8 @@ struct fw_card { __be32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4]; __be32 maint_utility_register; + + struct workqueue_struct *isoc_wq; }; static inline struct fw_card *fw_card_get(struct fw_card *card) @@ -509,6 +511,7 @@ union fw_iso_callback { struct fw_iso_context { struct fw_card *card; + struct work_struct work; int type; int channel; int speed; @@ -528,6 +531,25 @@ int fw_iso_context_queue(struct fw_iso_context *ctx, unsigned long payload); void fw_iso_context_queue_flush(struct fw_iso_context *ctx); int fw_iso_context_flush_completions(struct fw_iso_context *ctx); + +/** + * fw_iso_context_schedule_flush_completions() - schedule work item to process isochronous context. + * @ctx: the isochronous context + * + * Schedule a work item on workqueue to process the isochronous context. The registered callback + * function is called by the worker when a queued packet buffer with the interrupt flag is + * completed, either after transmission in the IT context or after being filled in the IR context. + * The callback function is also called when the header buffer in the context becomes full, If it + * is required to process the context in the current context, fw_iso_context_flush_completions() is + * available instead. + * + * Context: Any context. + */ +static inline void fw_iso_context_schedule_flush_completions(struct fw_iso_context *ctx) +{ + queue_work(ctx->card->isoc_wq, &ctx->work); +} + int fw_iso_context_start(struct fw_iso_context *ctx, int cycle, int sync, int tags); int fw_iso_context_stop(struct fw_iso_context *ctx); diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h new file mode 100644 index 000000000000..9b85a3f028d1 --- /dev/null +++ b/include/linux/firmware/imx/sm.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2024 NXP + */ + +#ifndef _SCMI_IMX_H +#define _SCMI_IMX_H + +#include <linux/bitfield.h> +#include <linux/errno.h> +#include <linux/types.h> + +#define SCMI_IMX_CTRL_PDM_CLK_SEL 0 /* AON PDM clock sel */ +#define SCMI_IMX_CTRL_MQS1_SETTINGS 1 /* AON MQS settings */ +#define SCMI_IMX_CTRL_SAI1_MCLK 2 /* AON SAI1 MCLK */ +#define SCMI_IMX_CTRL_SAI3_MCLK 3 /* WAKE SAI3 MCLK */ +#define SCMI_IMX_CTRL_SAI4_MCLK 4 /* WAKE SAI4 MCLK */ +#define SCMI_IMX_CTRL_SAI5_MCLK 5 /* WAKE SAI5 MCLK */ + +int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); +int scmi_imx_misc_ctrl_set(u32 id, u32 val); + +#endif diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 9f14976399ab..4621aec0328c 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -85,6 +85,8 @@ int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); bool qcom_scm_restore_sec_cfg_available(void); int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +int qcom_scm_set_gpu_smmu_aperture(unsigned int context_bank); +bool qcom_scm_set_gpu_smmu_aperture_is_available(void); int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index d7d07afc0532..6d4dbc196b93 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -3,7 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2021 Xilinx - * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. + * Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. * * Michal Simek <michal.simek@amd.com> * Davorin Mista <davorin.mista@aggios.com> @@ -32,6 +32,19 @@ /* SMC SIP service Call Function Identifier Prefix */ #define PM_SIP_SVC 0xC2000000 +/* SMC function ID to get SiP SVC version */ +#define GET_SIP_SVC_VERSION (0x8200ff03U) + +/* SiP Service Calls version numbers */ +#define SIP_SVC_VERSION_MAJOR (0U) +#define SIP_SVC_VERSION_MINOR (2U) + +#define SIP_SVC_PASSTHROUGH_VERSION ((SIP_SVC_VERSION_MAJOR << 16) | \ + SIP_SVC_VERSION_MINOR) + +/* Fixed ID for FW specific APIs */ +#define PASS_THROUGH_FW_CMD_ID GENMASK(11, 0) + /* PM API versions */ #define PM_API_VERSION_1 1 #define PM_API_VERSION_2 2 @@ -51,6 +64,7 @@ #define API_ID_MASK GENMASK(7, 0) #define MODULE_ID_MASK GENMASK(11, 8) +#define PLM_MODULE_ID_MASK GENMASK(15, 8) /* Firmware feature check version mask */ #define FIRMWARE_VERSION_MASK 0xFFFFU @@ -62,7 +76,13 @@ #define GET_CALLBACK_DATA 0xa01 /* Number of 32bits values in payload */ -#define PAYLOAD_ARG_CNT 4U +#define PAYLOAD_ARG_CNT 7U + +/* Number of 64bits arguments for SMC call */ +#define SMC_ARG_CNT_64 8U + +/* Number of 32bits arguments for SMC call */ +#define SMC_ARG_CNT_32 13U /* Number of arguments for a callback */ #define CB_ARG_CNT 4 @@ -130,6 +150,7 @@ enum pm_module_id { PM_MODULE_ID = 0x0, + XPM_MODULE_ID = 0x2, XSEM_MODULE_ID = 0x3, TF_A_MODULE_ID = 0xa, }; @@ -218,9 +239,13 @@ enum pm_ioctl_id { /* Runtime feature configuration */ IOCTL_SET_FEATURE_CONFIG = 26, IOCTL_GET_FEATURE_CONFIG = 27, + /* IOCTL for Secure Read/Write Interface */ + IOCTL_READ_REG = 28, /* Dynamic SD/GEM configuration */ IOCTL_SET_SD_CONFIG = 30, IOCTL_SET_GEM_CONFIG = 31, + /* IOCTL to get default/current QoS */ + IOCTL_GET_QOS = 34, }; enum pm_query_id { @@ -238,6 +263,7 @@ enum pm_query_id { PM_QID_PINCTRL_GET_PIN_GROUPS = 11, PM_QID_CLOCK_GET_NUM_CLOCKS = 12, PM_QID_CLOCK_GET_MAX_DIVISOR = 13, + PM_QID_PINCTRL_GET_ATTRIBUTES = 15, }; enum rpu_oper_mode { @@ -533,6 +559,7 @@ struct zynqmp_pm_query_data { }; int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); +int zynqmp_pm_invoke_fw_fn(u32 pm_api_id, u32 *ret_payload, u32 num_args, ...); #if IS_REACHABLE(CONFIG_ZYNQMP_FIRMWARE) int zynqmp_pm_get_api_version(u32 *version); @@ -553,9 +580,9 @@ int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data); int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value); int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type); int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select); -int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, +int zynqmp_pm_reset_assert(const u32 reset, const enum zynqmp_pm_reset_action assert_flag); -int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status); +int zynqmp_pm_reset_get_status(const u32 reset, u32 *status); unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode); int zynqmp_pm_bootmode_write(u32 ps_mode); int zynqmp_pm_init_finalize(void); @@ -698,14 +725,13 @@ static inline int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select) return -ENODEV; } -static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset, +static inline int zynqmp_pm_reset_assert(const u32 reset, const enum zynqmp_pm_reset_action assert_flag) { return -ENODEV; } -static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, - u32 *status) +static inline int zynqmp_pm_reset_get_status(const u32 reset, u32 *status) { return -ENODEV; } diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h new file mode 100644 index 000000000000..3abe614ef5f0 --- /dev/null +++ b/include/linux/folio_queue.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Queue of folios definitions + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * See: + * + * Documentation/core-api/folio_queue.rst + * + * for a description of the API. + */ + +#ifndef _LINUX_FOLIO_QUEUE_H +#define _LINUX_FOLIO_QUEUE_H + +#include <linux/pagevec.h> + +/* + * Segment in a queue of running buffers. Each segment can hold a number of + * folios and a portion of the queue can be referenced with the ITER_FOLIOQ + * iterator. The possibility exists of inserting non-folio elements into the + * queue (such as gaps). + * + * Explicit prev and next pointers are used instead of a list_head to make it + * easier to add segments to tail and remove them from the head without the + * need for a lock. + */ +struct folio_queue { + struct folio_batch vec; /* Folios in the queue segment */ + u8 orders[PAGEVEC_SIZE]; /* Order of each folio */ + struct folio_queue *next; /* Next queue segment or NULL */ + struct folio_queue *prev; /* Previous queue segment of NULL */ + unsigned long marks; /* 1-bit mark per folio */ + unsigned long marks2; /* Second 1-bit mark per folio */ + unsigned long marks3; /* Third 1-bit mark per folio */ +#if PAGEVEC_SIZE > BITS_PER_LONG +#error marks is not big enough +#endif +}; + +/** + * folioq_init - Initialise a folio queue segment + * @folioq: The segment to initialise + * + * Initialise a folio queue segment. Note that the folio pointers are + * left uninitialised. + */ +static inline void folioq_init(struct folio_queue *folioq) +{ + folio_batch_init(&folioq->vec); + folioq->next = NULL; + folioq->prev = NULL; + folioq->marks = 0; + folioq->marks2 = 0; + folioq->marks3 = 0; +} + +/** + * folioq_nr_slots: Query the capacity of a folio queue segment + * @folioq: The segment to query + * + * Query the number of folios that a particular folio queue segment might hold. + * [!] NOTE: This must not be assumed to be the same for every segment! + */ +static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq) +{ + return PAGEVEC_SIZE; +} + +/** + * folioq_count: Query the occupancy of a folio queue segment + * @folioq: The segment to query + * + * Query the number of folios that have been added to a folio queue segment. + * Note that this is not decreased as folios are removed from a segment. + */ +static inline unsigned int folioq_count(struct folio_queue *folioq) +{ + return folio_batch_count(&folioq->vec); +} + +/** + * folioq_full: Query if a folio queue segment is full + * @folioq: The segment to query + * + * Query if a folio queue segment is fully occupied. Note that this does not + * change if folios are removed from a segment. + */ +static inline bool folioq_full(struct folio_queue *folioq) +{ + //return !folio_batch_space(&folioq->vec); + return folioq_count(folioq) >= folioq_nr_slots(folioq); +} + +/** + * folioq_is_marked: Check first folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the first mark is set for the folio in the specified slot in a + * folio queue segment. + */ +static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks); +} + +/** + * folioq_mark: Set the first mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the first mark for the folio in the specified slot in a folio queue + * segment. + */ +static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks); +} + +/** + * folioq_unmark: Clear the first mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the first mark for the folio in the specified slot in a folio queue + * segment. + */ +static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks); +} + +/** + * folioq_is_marked2: Check second folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the second mark is set for the folio in the specified slot in a + * folio queue segment. + */ +static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks2); +} + +/** + * folioq_mark2: Set the second mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the second mark for the folio in the specified slot in a folio queue + * segment. + */ +static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks2); +} + +/** + * folioq_unmark2: Clear the second mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the second mark for the folio in the specified slot in a folio queue + * segment. + */ +static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks2); +} + +/** + * folioq_is_marked3: Check third folio mark in a folio queue segment + * @folioq: The segment to query + * @slot: The slot number of the folio to query + * + * Determine if the third mark is set for the folio in the specified slot in a + * folio queue segment. + */ +static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) +{ + return test_bit(slot, &folioq->marks3); +} + +/** + * folioq_mark3: Set the third mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Set the third mark for the folio in the specified slot in a folio queue + * segment. + */ +static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) +{ + set_bit(slot, &folioq->marks3); +} + +/** + * folioq_unmark3: Clear the third mark on a folio in a folio queue segment + * @folioq: The segment to modify + * @slot: The slot number of the folio to modify + * + * Clear the third mark for the folio in the specified slot in a folio queue + * segment. + */ +static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) +{ + clear_bit(slot, &folioq->marks3); +} + +static inline unsigned int __folio_order(struct folio *folio) +{ + if (!folio_test_large(folio)) + return 0; + return folio->_flags_1 & 0xff; +} + +/** + * folioq_append: Add a folio to a folio queue segment + * @folioq: The segment to add to + * @folio: The folio to add + * + * Add a folio to the tail of the sequence in a folio queue segment, increasing + * the occupancy count and returning the slot number for the folio just added. + * The folio size is extracted and stored in the queue and the marks are left + * unmodified. + * + * Note that it's left up to the caller to check that the segment capacity will + * not be exceeded and to extend the queue. + */ +static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio) +{ + unsigned int slot = folioq->vec.nr++; + + folioq->vec.folios[slot] = folio; + folioq->orders[slot] = __folio_order(folio); + return slot; +} + +/** + * folioq_append_mark: Add a folio to a folio queue segment + * @folioq: The segment to add to + * @folio: The folio to add + * + * Add a folio to the tail of the sequence in a folio queue segment, increasing + * the occupancy count and returning the slot number for the folio just added. + * The folio size is extracted and stored in the queue, the first mark is set + * and and the second and third marks are left unmodified. + * + * Note that it's left up to the caller to check that the segment capacity will + * not be exceeded and to extend the queue. + */ +static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio) +{ + unsigned int slot = folioq->vec.nr++; + + folioq->vec.folios[slot] = folio; + folioq->orders[slot] = __folio_order(folio); + folioq_mark(folioq, slot); + return slot; +} + +/** + * folioq_folio: Get a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the folio in the specified slot from a folio queue segment. Note + * that no bounds check is made and if the slot hasn't been added into yet, the + * pointer will be undefined. If the slot has been cleared, NULL will be + * returned. + */ +static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot) +{ + return folioq->vec.folios[slot]; +} + +/** + * folioq_folio_order: Get the order of a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the order of the folio in the specified slot from a folio queue + * segment. Note that no bounds check is made and if the slot hasn't been + * added into yet, the order returned will be 0. + */ +static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot) +{ + return folioq->orders[slot]; +} + +/** + * folioq_folio_size: Get the size of a folio from a folio queue segment + * @folioq: The segment to access + * @slot: The folio slot to access + * + * Retrieve the size of the folio in the specified slot from a folio queue + * segment. Note that no bounds check is made and if the slot hasn't been + * added into yet, the size returned will be PAGE_SIZE. + */ +static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot) +{ + return PAGE_SIZE << folioq_folio_order(folioq, slot); +} + +/** + * folioq_clear: Clear a folio from a folio queue segment + * @folioq: The segment to clear + * @slot: The folio slot to clear + * + * Clear a folio from a sequence in a folio queue segment and clear its marks. + * The occupancy count is left unchanged. + */ +static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) +{ + folioq->vec.folios[slot] = NULL; + folioq_unmark(folioq, slot); + folioq_unmark2(folioq, slot); + folioq_unmark3(folioq, slot); +} + +#endif /* _LINUX_FOLIO_QUEUE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ca11e241a24..7e29433c5ecc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -45,6 +45,8 @@ #include <linux/slab.h> #include <linux/maple_tree.h> #include <linux/rw_hint.h> +#include <linux/file_ref.h> +#include <linux/unicode.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -146,8 +148,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* Expect random access pattern */ #define FMODE_RANDOM ((__force fmode_t)(1 << 12)) -/* File is huge (eg. /dev/mem): treat loff_t as unsigned */ -#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13)) +/* FMODE_* bit 13 */ /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)(1 << 14)) @@ -409,10 +410,10 @@ struct address_space_operations { int (*write_begin)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata); + struct folio **foliop, void **fsdata); int (*write_end)(struct file *, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata); + struct folio *folio, void *fsdata); /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); @@ -624,6 +625,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_NOFOLLOW 0x0004 #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 +#define IOP_MGTIME 0x0020 /* * Keep mostly read-only and often accessed (especially for @@ -683,7 +685,8 @@ struct inode { #endif /* Misc */ - unsigned long i_state; + u32 i_state; + /* 32-bit hole */ struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ @@ -746,6 +749,21 @@ struct inode { void *i_private; /* fs or device private pointer */ } __randomize_layout; +/* + * Get bit address from inode->i_state to use with wait_var_event() + * infrastructre. + */ +#define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit)) + +struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, + struct inode *inode, u32 bit); + +static inline void inode_wake_up_bit(struct inode *inode, u32 bit) +{ + /* Caller is responsible for correct memory barriers. */ + wake_up_var(inode_state_wait_address(inode, bit)); +} + struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode); static inline unsigned int i_blocksize(const struct inode *node) @@ -948,6 +966,7 @@ static inline unsigned imajor(const struct inode *inode) } struct fown_struct { + struct file *file; /* backpointer for security modules */ rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ @@ -987,52 +1006,69 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -/* - * f_{lock,count,pos_lock} members can be highly contended and share - * the same cacheline. f_{lock,mode} are very frequently used together - * and so share the same cacheline as well. The read-mostly - * f_{path,inode,op} are kept on a separate cacheline. +/** + * struct file - Represents a file + * @f_ref: reference count + * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. + * @f_mode: FMODE_* flags often used in hotpaths + * @f_op: file operations + * @f_mapping: Contents of a cacheable, mappable object. + * @private_data: filesystem or driver specific data + * @f_inode: cached inode + * @f_flags: file flags + * @f_iocb_flags: iocb flags + * @f_cred: stashed credentials of creator/opener + * @f_path: path of the file + * @f_pos_lock: lock protecting file position + * @f_pipe: specific to pipes + * @f_pos: file position + * @f_security: LSM security context of this file + * @f_owner: file owner + * @f_wb_err: writeback error + * @f_sb_err: per sb writeback errors + * @f_ep: link of all epoll hooks for this file + * @f_task_work: task work entry point + * @f_llist: work queue entrypoint + * @f_ra: file's readahead state + * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) */ struct file { + file_ref_t f_ref; + spinlock_t f_lock; + fmode_t f_mode; + const struct file_operations *f_op; + struct address_space *f_mapping; + void *private_data; + struct inode *f_inode; + unsigned int f_flags; + unsigned int f_iocb_flags; + const struct cred *f_cred; + /* --- cacheline 1 boundary (64 bytes) --- */ + struct path f_path; union { - /* fput() uses task work when closing and freeing file (default). */ - struct callback_head f_task_work; - /* fput() must use workqueue (most kernel threads). */ - struct llist_node f_llist; - unsigned int f_iocb_flags; + /* regular files (with FMODE_ATOMIC_POS) and directories */ + struct mutex f_pos_lock; + /* pipes */ + u64 f_pipe; }; - - /* - * Protects f_ep, f_flags. - * Must not be taken from IRQ context. - */ - spinlock_t f_lock; - fmode_t f_mode; - atomic_long_t f_count; - struct mutex f_pos_lock; - loff_t f_pos; - unsigned int f_flags; - struct fown_struct f_owner; - const struct cred *f_cred; - struct file_ra_state f_ra; - struct path f_path; - struct inode *f_inode; /* cached value */ - const struct file_operations *f_op; - - u64 f_version; + loff_t f_pos; #ifdef CONFIG_SECURITY - void *f_security; + void *f_security; #endif - /* needed for tty driver, and maybe others */ - void *private_data; - + /* --- cacheline 2 boundary (128 bytes) --- */ + struct fown_struct *f_owner; + errseq_t f_wb_err; + errseq_t f_sb_err; #ifdef CONFIG_EPOLL - /* Used by fs/eventpoll.c to link all the hooks to this file */ - struct hlist_head *f_ep; -#endif /* #ifdef CONFIG_EPOLL */ - struct address_space *f_mapping; - errseq_t f_wb_err; - errseq_t f_sb_err; /* for syncfs */ + struct hlist_head *f_ep; +#endif + union { + struct callback_head f_task_work; + struct llist_node f_llist; + struct file_ra_state f_ra; + freeptr_t f_freeptr; + }; + /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ @@ -1045,15 +1081,14 @@ struct file_handle { static inline struct file *get_file(struct file *f) { - long prior = atomic_long_fetch_inc_relaxed(&f->f_count); - WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n"); + file_ref_inc(&f->f_ref); return f; } struct file *get_file_rcu(struct file __rcu **f); struct file *get_file_active(struct file **f); -#define file_count(x) atomic_long_read(&(x)->f_count) +#define file_count(f) file_ref_read(&(f)->f_ref) #define MAX_NON_LFS ((1UL<<31) - 1) @@ -1077,6 +1112,12 @@ struct file_lease; #define OFFT_OFFSET_MAX type_max(off_t) #endif +int file_f_owner_allocate(struct file *file); +static inline struct fown_struct *file_f_owner(const struct file *file) +{ + return READ_ONCE(file->f_owner); +} + extern void send_sigio(struct fown_struct *fown, int fd, int band); static inline struct inode *file_inode(const struct file *f) @@ -1125,7 +1166,7 @@ extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); -extern int send_sigurg(struct fown_struct *fown); +extern int send_sigurg(struct file *file); /* * sb->s_flags. Note that these mirror the equivalent MS_* flags where @@ -1190,6 +1231,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ +#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ /* Possible states of 'frozen' field */ enum { @@ -1268,7 +1310,7 @@ struct super_block { time64_t s_time_min; time64_t s_time_max; #ifdef CONFIG_FSNOTIFY - __u32 s_fsnotify_mask; + u32 s_fsnotify_mask; struct fsnotify_sb_info *s_fsnotify_info; #endif @@ -1544,6 +1586,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, struct timespec64 current_time(struct inode *inode); struct timespec64 inode_set_ctime_current(struct inode *inode); +struct timespec64 inode_set_ctime_deleg(struct inode *inode, + struct timespec64 update); static inline time64_t inode_get_atime_sec(const struct inode *inode) { @@ -1613,6 +1657,17 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode, return inode_set_mtime_to_ts(inode, ts); } +/* + * Multigrain timestamps + * + * Conditionally use fine-grained ctime and mtime timestamps when there + * are users actively observing them via getattr. The primary use-case + * for this is NFS clients that use the ctime to distinguish between + * different states of the file, and that are often fooled by multiple + * operations that occur in the same coarse-grained timer tick. + */ +#define I_CTIME_QUERIED ((u32)BIT(31)) + static inline time64_t inode_get_ctime_sec(const struct inode *inode) { return inode->i_ctime_sec; @@ -1620,7 +1675,7 @@ static inline time64_t inode_get_ctime_sec(const struct inode *inode) static inline long inode_get_ctime_nsec(const struct inode *inode) { - return inode->i_ctime_nsec; + return inode->i_ctime_nsec & ~I_CTIME_QUERIED; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) @@ -1631,13 +1686,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode) return ts; } -static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, - struct timespec64 ts) -{ - inode->i_ctime_sec = ts.tv_sec; - inode->i_ctime_nsec = ts.tv_nsec; - return ts; -} +struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts); /** * inode_set_ctime - set the ctime in the inode @@ -1684,7 +1733,7 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level) #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) #define __sb_writers_release(sb, lev) \ - percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) + percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_) /** * __sb_write_started - check if sb freeze level is held @@ -2074,6 +2123,10 @@ struct file_operations { #define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) /* Contains huge pages */ #define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) +/* Treat loff_t as unsigned (e.g., /dev/mem) */ +#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) +/* Supports asynchronous lock callbacks */ +#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, @@ -2373,8 +2426,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_REFERENCED Marks the inode as recently references on the LRU list. * - * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit(). - * * I_WB_SWITCH Cgroup bdi_writeback switching in progress. Used to * synchronize competing switching instances and to tell * wb stat updates to grab the i_pages lock. See @@ -2397,30 +2448,32 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * i_count. * * Q: What is the difference between I_WILL_FREE and I_FREEING? + * + * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait + * upon. There's one free address left. */ -#define I_DIRTY_SYNC (1 << 0) -#define I_DIRTY_DATASYNC (1 << 1) -#define I_DIRTY_PAGES (1 << 2) -#define __I_NEW 3 +#define __I_NEW 0 #define I_NEW (1 << __I_NEW) -#define I_WILL_FREE (1 << 4) -#define I_FREEING (1 << 5) -#define I_CLEAR (1 << 6) -#define __I_SYNC 7 +#define __I_SYNC 1 #define I_SYNC (1 << __I_SYNC) -#define I_REFERENCED (1 << 8) -#define __I_DIO_WAKEUP 9 -#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) +#define __I_LRU_ISOLATING 2 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) + +#define I_DIRTY_SYNC (1 << 3) +#define I_DIRTY_DATASYNC (1 << 4) +#define I_DIRTY_PAGES (1 << 5) +#define I_WILL_FREE (1 << 6) +#define I_FREEING (1 << 7) +#define I_CLEAR (1 << 8) +#define I_REFERENCED (1 << 9) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define I_WB_SWITCH (1 << 13) -#define I_OVL_INUSE (1 << 14) -#define I_CREATING (1 << 15) -#define I_DONTCACHE (1 << 16) -#define I_SYNC_QUEUED (1 << 17) -#define I_PINNING_NETFS_WB (1 << 18) -#define __I_LRU_ISOLATING 19 -#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) +#define I_WB_SWITCH (1 << 12) +#define I_OVL_INUSE (1 << 13) +#define I_CREATING (1 << 14) +#define I_DONTCACHE (1 << 15) +#define I_SYNC_QUEUED (1 << 16) +#define I_PINNING_NETFS_WB (1 << 17) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) @@ -2500,6 +2553,7 @@ struct file_system_type { #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ +#define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2523,6 +2577,17 @@ struct file_system_type { #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) +/** + * is_mgtime: is this inode using multigrain timestamps + * @inode: inode to test for multigrain timestamps + * + * Return true if the inode uses multigrain timestamps, false otherwise. + */ +static inline bool is_mgtime(const struct inode *inode) +{ + return inode->i_opflags & IOP_MGTIME; +} + extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2554,10 +2619,17 @@ struct super_block *sget(struct file_system_type *type, struct super_block *sget_dev(struct fs_context *fc, dev_t dev); /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ -#define fops_get(fops) \ - (((fops) && try_module_get((fops)->owner) ? (fops) : NULL)) -#define fops_put(fops) \ - do { if (fops) module_put((fops)->owner); } while(0) +#define fops_get(fops) ({ \ + const struct file_operations *_fops = (fops); \ + (((_fops) && try_module_get((_fops)->owner) ? (_fops) : NULL)); \ +}) + +#define fops_put(fops) ({ \ + const struct file_operations *_fops = (fops); \ + if (_fops) \ + module_put((_fops)->owner); \ +}) + /* * This one is to be used *ONLY* from ->open() instances. * fops must be non-NULL, pinned down *and* module dependencies @@ -2717,6 +2789,16 @@ extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); +extern struct filename *__getname_maybe_null(const char __user *); +static inline struct filename *getname_maybe_null(const char __user *name, int flags) +{ + if (!(flags & AT_EMPTY_PATH)) + return getname(name); + + if (!name) + return NULL; + return __getname_maybe_null(name); +} extern void putname(struct filename *name); extern int finish_open(struct file *file, struct dentry *dentry, @@ -3033,7 +3115,12 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int whence); extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence); -extern int inode_init_always(struct super_block *, struct inode *); +extern int inode_init_always_gfp(struct super_block *, struct inode *, gfp_t); +static inline int inode_init_always(struct super_block *sb, struct inode *inode) +{ + return inode_init_always_gfp(sb, inode, GFP_NOFS); +} + extern void inode_init_once(struct inode *); extern void address_space_init_once(struct address_space *mapping); extern struct inode * igrab(struct inode *); @@ -3100,7 +3187,14 @@ static inline bool is_zero_ino(ino_t ino) return (u32)ino == 0; } -extern void __iget(struct inode * inode); +/* + * inode->i_lock must be held + */ +static inline void __iget(struct inode *inode) +{ + atomic_inc(&inode->i_count); +} + extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); @@ -3178,11 +3272,12 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); -#define no_llseek NULL extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); +loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence, + u64 *cookie); extern loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); @@ -3220,7 +3315,9 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, } #endif +bool inode_dio_finished(const struct inode *inode); void inode_dio_wait(struct inode *inode); +void inode_dio_wait_interruptible(struct inode *inode); /** * inode_dio_begin - signal start of a direct I/O requests @@ -3244,7 +3341,7 @@ static inline void inode_dio_begin(struct inode *inode) static inline void inode_dio_end(struct inode *inode) { if (atomic_dec_and_test(&inode->i_dio_count)) - wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); + wake_up_var(&inode->i_dio_count); } extern void inode_set_flags(struct inode *inode, unsigned int flags, @@ -3262,6 +3359,7 @@ extern void page_put_link(void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); +void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, @@ -3337,7 +3435,7 @@ extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata); + struct folio **foliop, void **fsdata); extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); @@ -3392,6 +3490,54 @@ extern int generic_ci_match(const struct inode *parent, const struct qstr *folded_name, const u8 *de_name, u32 de_name_len); +#if IS_ENABLED(CONFIG_UNICODE) +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); + +/** + * generic_ci_validate_strict_name - Check if a given name is suitable + * for a directory + * + * This functions checks if the proposed filename is valid for the + * parent directory. That means that only valid UTF-8 filenames will be + * accepted for casefold directories from filesystems created with the + * strict encoding flag. That also means that any name will be + * accepted for directories that doesn't have casefold enabled, or + * aren't being strict with the encoding. + * + * @dir: inode of the directory where the new file will be created + * @name: name of the new file + * + * Return: + * * True: if the filename is suitable for this directory. It can be + * true if a given name is not suitable for a strict encoding + * directory, but the directory being used isn't strict + * * False if the filename isn't suitable for this directory. This only + * happens when a directory is casefolded and the filesystem is strict + * about its encoding. + */ +static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +{ + if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb)) + return true; + + /* + * A casefold dir must have a encoding set, unless the filesystem + * is corrupted + */ + if (WARN_ON_ONCE(!dir->i_sb->s_encoding)) + return true; + + return !utf8_validate(dir->i_sb->s_encoding, name); +} +#else +static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name) +{ + return true; +} +#endif + static inline bool sb_has_encoding(const struct super_block *sb) { #if IS_ENABLED(CONFIG_UNICODE) @@ -3461,7 +3607,6 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - kiocb_flags |= IOCB_NOIO; } if (flags & RWF_ATOMIC) { if (rw_type != WRITE) @@ -3663,6 +3808,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path) return !c; } -bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos); +int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter); #endif /* _LINUX_FS_H */ diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index c13e99cbbf81..4b4bfef6f053 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -160,6 +160,12 @@ extern int get_tree_keyed(struct fs_context *fc, int setup_bdev_super(struct super_block *sb, int sb_flags, struct fs_context *fc); + +#define GET_TREE_BDEV_QUIET_LOOKUP 0x0001 +int get_tree_bdev_flags(struct fs_context *fc, + int (*fill_super)(struct super_block *sb, + struct fs_context *fc), unsigned int flags); + extern int get_tree_bdev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 6cf713a7e6c6..3cef566088fc 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -28,7 +28,8 @@ typedef int fs_param_type(struct p_log *, */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, - fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid; + fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid, + fs_param_is_file_or_string; /* * Specification of the type of value a parameter wants. @@ -133,6 +134,8 @@ static inline bool fs_validate_description(const char *name, #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) +#define fsparam_file_or_string(NAME, OPT) \ + __fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL) #define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL) #define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL) diff --git a/include/linux/fsl/enetc_mdio.h b/include/linux/fsl/enetc_mdio.h index df25fffdc0ae..623ccfcbf39c 100644 --- a/include/linux/fsl/enetc_mdio.h +++ b/include/linux/fsl/enetc_mdio.h @@ -59,7 +59,8 @@ static inline int enetc_mdio_read_c45(struct mii_bus *bus, int phy_id, static inline int enetc_mdio_write_c45(struct mii_bus *bus, int phy_id, int devad, int regnum, u16 value) { return -EINVAL; } -struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs) +static inline struct enetc_hw *enetc_hw_alloc(struct device *dev, + void __iomem *port_regs) { return ERR_PTR(-EINVAL); } #endif diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 083c860fd28e..c90ec889bfc2 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -436,7 +436,7 @@ void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, u16 if_id); -extern struct bus_type fsl_mc_bus_type; +extern const struct bus_type fsl_mc_bus_type; extern struct device_type fsl_mc_bus_dprc_type; extern struct device_type fsl_mc_bus_dpni_type; diff --git a/include/linux/fsl/netc_global.h b/include/linux/fsl/netc_global.h new file mode 100644 index 000000000000..fdecca8c90f0 --- /dev/null +++ b/include/linux/fsl/netc_global.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2024 NXP + */ +#ifndef __NETC_GLOBAL_H +#define __NETC_GLOBAL_H + +#include <linux/io.h> + +static inline u32 netc_read(void __iomem *reg) +{ + return ioread32(reg); +} + +static inline void netc_write(void __iomem *reg, u32 val) +{ + iowrite32(val, reg); +} + +#endif diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 8be029bc50b1..3ecf7768e577 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -217,7 +217,6 @@ struct fsnotify_group { #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ #define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */ -#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */ int flags; unsigned int owner_flags; /* stored flags of mark_mutex owner */ @@ -268,22 +267,19 @@ struct fsnotify_group { static inline void fsnotify_group_lock(struct fsnotify_group *group) { mutex_lock(&group->mark_mutex); - if (group->flags & FSNOTIFY_GROUP_NOFS) - group->owner_flags = memalloc_nofs_save(); + group->owner_flags = memalloc_nofs_save(); } static inline void fsnotify_group_unlock(struct fsnotify_group *group) { - if (group->flags & FSNOTIFY_GROUP_NOFS) - memalloc_nofs_restore(group->owner_flags); + memalloc_nofs_restore(group->owner_flags); mutex_unlock(&group->mark_mutex); } static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) { WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); - if (group->flags & FSNOTIFY_GROUP_NOFS) - WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); + WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); } /* When calling fsnotify tell it if the data is a path or inode */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fd5e84d0ec47..aa9ddd1e4bb6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -113,14 +113,54 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val #ifdef CONFIG_FUNCTION_TRACER -extern int ftrace_enabled; +#include <linux/ftrace_regs.h> -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +extern int ftrace_enabled; +/** + * ftrace_regs - ftrace partial/optimal register set + * + * ftrace_regs represents a group of registers which is used at the + * function entry and exit. There are three types of registers. + * + * - Registers for passing the parameters to callee, including the stack + * pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64) + * - Registers for passing the return values to caller. + * (e.g. rax and rdx on x86_64) + * - Registers for hooking the function call and return including the + * frame pointer (the frame pointer is architecture/config dependent) + * (e.g. rip, rbp and rsp for x86_64) + * + * Also, architecture dependent fields can be used for internal process. + * (e.g. orig_ax on x86_64) + * + * On the function entry, those registers will be restored except for + * the stack pointer, so that user can change the function parameters + * and instruction pointer (e.g. live patching.) + * On the function exit, only registers which is used for return values + * are restored. + * + * NOTE: user *must not* access regs directly, only do it via APIs, because + * the member can be changed according to the architecture. + * This is why the structure is empty here, so that nothing accesses + * the ftrace_regs directly. + */ struct ftrace_regs { - struct pt_regs regs; + /* Nothing to see here, use the accessor functions! */ }; -#define arch_ftrace_get_regs(fregs) (&(fregs)->regs) + +#define ftrace_regs_size() sizeof(struct __arch_ftrace_regs) + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +/* + * Architectures that define HAVE_DYNAMIC_FTRACE_WITH_ARGS must define their own + * arch_ftrace_get_regs() where it only returns pt_regs *if* it is fully + * populated. It should return NULL otherwise. + */ +static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) +{ + return &arch_ftrace_regs(fregs)->regs; +} /* * ftrace_regs_set_instruction_pointer() is to be defined by the architecture @@ -150,23 +190,6 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) return ftrace_get_regs(fregs) != NULL; } -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS -#define ftrace_regs_get_instruction_pointer(fregs) \ - instruction_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_get_argument(fregs, n) \ - regs_get_kernel_argument(ftrace_get_regs(fregs), n) -#define ftrace_regs_get_stack_pointer(fregs) \ - kernel_stack_pointer(ftrace_get_regs(fregs)) -#define ftrace_regs_return_value(fregs) \ - regs_return_value(ftrace_get_regs(fregs)) -#define ftrace_regs_set_return_value(fregs, ret) \ - regs_set_return_value(ftrace_get_regs(fregs), ret) -#define ftrace_override_function_with_return(fregs) \ - override_function_with_return(ftrace_get_regs(fregs)) -#define ftrace_regs_query_register_offset(name) \ - regs_query_register_offset(name) -#endif - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); @@ -1015,6 +1038,17 @@ struct ftrace_graph_ent { } __packed; /* + * Structure that defines an entry function trace with retaddr. + * It's already packed but the attribute "packed" is needed + * to remove extra padding at the end. + */ +struct fgraph_retaddr_ent { + unsigned long func; /* Current function */ + int depth; + unsigned long retaddr; /* Return address */ +} __packed; + +/* * Structure that defines a return function trace. * It's already packed but the attribute "packed" is needed * to remove extra padding at the end. @@ -1039,7 +1073,8 @@ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, struct fgraph_ops *); /* entry */ -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -1055,6 +1090,7 @@ struct fgraph_ops { void *fgraph_reserve_data(int idx, int size_bytes); void *fgraph_retrieve_data(int idx, int *size_bytes); +void *fgraph_retrieve_parent_data(int idx, int *size_bytes, int depth); /* * Stack of return addresses for functions @@ -1064,10 +1100,6 @@ void *fgraph_retrieve_data(int idx, int *size_bytes); struct ftrace_ret_stack { unsigned long ret; unsigned long func; - unsigned long long calltime; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long long subtime; -#endif #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif @@ -1087,6 +1119,7 @@ function_graph_enter(unsigned long ret, unsigned long func, struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int skip); +unsigned long ftrace_graph_top_ret_addr(struct task_struct *task); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h new file mode 100644 index 000000000000..be1ed0c891d0 --- /dev/null +++ b/include/linux/ftrace_regs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FTRACE_REGS_H +#define _LINUX_FTRACE_REGS_H + +/* + * For archs that just copy pt_regs in ftrace regs, it can use this default. + * If an architecture does not use pt_regs, it must define all the below + * accessor functions. + */ +#ifndef HAVE_ARCH_FTRACE_REGS +struct __arch_ftrace_regs { + struct pt_regs regs; +}; + +#define arch_ftrace_regs(fregs) ((struct __arch_ftrace_regs *)(fregs)) + +struct ftrace_regs; + +#define ftrace_regs_get_instruction_pointer(fregs) \ + instruction_pointer(&arch_ftrace_regs(fregs)->regs) +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(&arch_ftrace_regs(fregs)->regs, n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(&arch_ftrace_regs(fregs)->regs) +#define ftrace_regs_get_return_value(fregs) \ + regs_return_value(&arch_ftrace_regs(fregs)->regs) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(&arch_ftrace_regs(fregs)->regs, ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(&arch_ftrace_regs(fregs)->regs) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) + +#endif /* HAVE_ARCH_FTRACE_REGS */ + +#endif /* _LINUX_FTRACE_REGS_H */ diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h index 3ff4c277296f..9bd605b87c4c 100644 --- a/include/linux/fw_table.h +++ b/include/linux/fw_table.h @@ -54,7 +54,7 @@ int cdat_table_parse(enum acpi_cdat_type type, #define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_ACPI_LIB(x) #define __init_or_fwtbl_lib __init_or_acpilib #else -#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_NS_GPL(x, CXL) +#define EXPORT_SYMBOL_FWTBL_LIB(x) EXPORT_SYMBOL_NS_GPL(x, "CXL") #define __init_or_fwtbl_lib #endif diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index f3512fddf3d7..5b51c3d582d6 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -41,6 +41,7 @@ #include <linux/limits.h> #include <linux/log2.h> #include <linux/math.h> +#include <linux/slab.h> #include <linux/types.h> struct genradix_root; @@ -48,10 +49,63 @@ struct genradix_root; #define GENRADIX_NODE_SHIFT 9 #define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT) +#define GENRADIX_ARY (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *)) +#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY) + +/* depth that's needed for a genradix that can address up to ULONG_MAX: */ +#define GENRADIX_MAX_DEPTH \ + DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT) + +#define GENRADIX_DEPTH_MASK \ + ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1)) + +static inline int genradix_depth_shift(unsigned depth) +{ + return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth; +} + +/* + * Returns size (of data, in bytes) that a tree of a given depth holds: + */ +static inline size_t genradix_depth_size(unsigned depth) +{ + return 1UL << genradix_depth_shift(depth); +} + +static inline unsigned genradix_root_to_depth(struct genradix_root *r) +{ + return (unsigned long) r & GENRADIX_DEPTH_MASK; +} + +static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r) +{ + return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK); +} + struct __genradix { struct genradix_root *root; }; +struct genradix_node { + union { + /* Interior node: */ + struct genradix_node *children[GENRADIX_ARY]; + + /* Leaf: */ + u8 data[GENRADIX_NODE_SIZE]; + }; +}; + +static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask) +{ + return kzalloc(GENRADIX_NODE_SIZE, gfp_mask); +} + +static inline void genradix_free_node(struct genradix_node *node) +{ + kfree(node); +} + /* * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE: */ @@ -128,6 +182,30 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) +static inline void *__genradix_ptr_inlined(struct __genradix *radix, size_t offset) +{ + struct genradix_root *r = READ_ONCE(radix->root); + struct genradix_node *n = genradix_root_to_node(r); + unsigned level = genradix_root_to_depth(r); + unsigned shift = genradix_depth_shift(level); + + if (unlikely(ilog2(offset) >= genradix_depth_shift(level))) + return NULL; + + while (n && shift > GENRADIX_NODE_SHIFT) { + shift -= GENRADIX_ARY_SHIFT; + n = n->children[offset >> shift]; + offset &= (1UL << shift) - 1; + } + + return n ? &n->data[offset] : NULL; +} + +#define genradix_ptr_inlined(_radix, _idx) \ + (__genradix_cast(_radix) \ + __genradix_ptr_inlined(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx))) + void *__genradix_ptr(struct __genradix *, size_t); /** @@ -142,7 +220,24 @@ void *__genradix_ptr(struct __genradix *, size_t); __genradix_ptr(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx))) -void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); +void *__genradix_ptr_alloc(struct __genradix *, size_t, + struct genradix_node **, gfp_t); + +#define genradix_ptr_alloc_inlined(_radix, _idx, _gfp) \ + (__genradix_cast(_radix) \ + (__genradix_ptr_inlined(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx)) ?: \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + NULL, _gfp))) + +#define genradix_ptr_alloc_preallocated_inlined(_radix, _idx, _new_node, _gfp)\ + (__genradix_cast(_radix) \ + (__genradix_ptr_inlined(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx)) ?: \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _new_node, _gfp))) /** * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it @@ -157,7 +252,13 @@ void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); (__genradix_cast(_radix) \ __genradix_ptr_alloc(&(_radix)->tree, \ __genradix_idx_to_offset(_radix, _idx), \ - _gfp)) + NULL, _gfp)) + +#define genradix_ptr_alloc_preallocated(_radix, _idx, _new_node, _gfp)\ + (__genradix_cast(_radix) \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _new_node, _gfp)) struct genradix_iter { size_t offset; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f53f76e0b17e..b0fe9f62d15b 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -306,7 +306,7 @@ struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, bool hugepage); + unsigned long addr); #else static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { @@ -319,14 +319,14 @@ static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order } static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { - return __folio_alloc_node(gfp, order, numa_node_id()); + return __folio_alloc_node_noprof(gfp, order, numa_node_id()); } static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid) { return folio_alloc_noprof(gfp, order); } -#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage) \ +#define vma_alloc_folio_noprof(gfp, order, vma, addr) \ folio_alloc_noprof(gfp, order) #endif @@ -341,7 +341,7 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde static inline struct page *alloc_page_vma_noprof(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { - struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr, false); + struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr); return &folio->page; } @@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -struct page_frag_cache; -void page_frag_cache_drain(struct page_frag_cache *nc); -extern void __page_frag_cache_drain(struct page *page, unsigned int count); -void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, - gfp_t gfp_mask, unsigned int align_mask); - -static inline void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align) -{ - WARN_ON_ONCE(!is_power_of_2(align)); - return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); -} - -static inline void *page_frag_alloc(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask) -{ - return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); -} - -extern void page_frag_free(void *addr); - #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) @@ -446,4 +424,27 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_ #endif void free_contig_range(unsigned long pfn, unsigned long nr_pages); +#ifdef CONFIG_CONTIG_ALLOC +static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, + int nid, nodemask_t *node) +{ + struct page *page; + + if (WARN_ON(!order || !(gfp & __GFP_COMP))) + return NULL; + + page = alloc_contig_pages_noprof(1 << order, gfp, nid, node); + + return page ? page_folio(page) : NULL; +} +#else +static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, + int nid, nodemask_t *node) +{ + return NULL; +} +#endif +/* This should be paired with folio_put() rather than free_contig_range(). */ +#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) + #endif /* __LINUX_GFP_H */ diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 313be4ad79fd..65db9349f905 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -215,7 +215,8 @@ enum { * the caller still has to check for failures) while costly requests try to be * not disruptive and back off even without invoking the OOM killer. * The following three modifiers might be used to override some of these - * implicit rules. + * implicit rules. Please note that all of them must be used along with + * %__GFP_DIRECT_RECLAIM flag. * * %__GFP_NORETRY: The VM implementation will try only very lightweight * memory direct reclaim to get some memory under memory pressure (thus @@ -246,11 +247,14 @@ enum { * cannot handle allocation failures. The allocation could block * indefinitely but will never return with failure. Testing for * failure is pointless. + * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM. + * It should _never_ be used in non-sleepable contexts. * New users should be evaluated carefully (and the flag should be * used only when there is no reasonable failure policy) but it is * definitely preferable to use the flag rather than opencode endless * loop around allocator. - * Using this flag for costly allocations is _highly_ discouraged. + * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is + * not supported. Please consider using kvmalloc() instead. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 063f71b18a7c..6270150f4e29 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -17,18 +17,9 @@ struct device; /* make these flag values available regardless of GPIO kconfig options */ -#define GPIOF_DIR_OUT (0 << 0) -#define GPIOF_DIR_IN (1 << 0) - -#define GPIOF_INIT_LOW (0 << 1) -#define GPIOF_INIT_HIGH (1 << 1) - -#define GPIOF_IN (GPIOF_DIR_IN) -#define GPIOF_OUT_INIT_LOW (GPIOF_DIR_OUT | GPIOF_INIT_LOW) -#define GPIOF_OUT_INIT_HIGH (GPIOF_DIR_OUT | GPIOF_INIT_HIGH) - -/* Gpio pin is active-low */ -#define GPIOF_ACTIVE_LOW (1 << 2) +#define GPIOF_IN ((1 << 0)) +#define GPIOF_OUT_INIT_LOW ((0 << 0) | (0 << 1)) +#define GPIOF_OUT_INIT_HIGH ((0 << 0) | (1 << 1)) /** * struct gpio - a structure describing a GPIO with configuration diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 3bb87bf6bc65..455f855bc084 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -59,6 +59,15 @@ enum hdmi_infoframe_type { #define HDMI_DRM_INFOFRAME_SIZE 26 #define HDMI_VENDOR_INFOFRAME_SIZE 4 +/* + * HDMI 1.3a table 5-14 states that the largest InfoFrame_length is 27, + * not including the packet header or checksum byte. We include the + * checksum byte in HDMI_INFOFRAME_HEADER_SIZE, so this should allow + * HDMI_INFOFRAME_SIZE(MAX) to be the largest buffer we could ever need + * for any HDMI infoframe. + */ +#define HDMI_MAX_INFOFRAME_SIZE 27 + #define HDMI_INFOFRAME_SIZE(type) \ (HDMI_INFOFRAME_HEADER_SIZE + HDMI_ ## type ## _INFOFRAME_SIZE) diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 6730ee900ee1..8a03d9696b1c 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -30,6 +30,8 @@ #define HID_USAGE_SENSOR_PROX 0x200011 #define HID_USAGE_SENSOR_DATA_PRESENCE 0x2004b0 #define HID_USAGE_SENSOR_HUMAN_PRESENCE 0x2004b1 +#define HID_USAGE_SENSOR_HUMAN_PROXIMITY 0x2004b2 +#define HID_USAGE_SENSOR_HUMAN_ATTENTION 0x2004bd /* Pressure (200031) */ #define HID_USAGE_SENSOR_PRESSURE 0x200031 diff --git a/include/linux/hid.h b/include/linux/hid.h index 1533c9dcd3a6..d11e9c9a5f15 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -46,7 +46,7 @@ struct hid_item { __s16 s16; __u32 u32; __s32 s32; - __u8 *longdata; + const __u8 *longdata; } data; }; @@ -359,6 +359,7 @@ struct hid_item { * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP: * | @HID_QUIRK_HAVE_SPECIAL_DRIVER: * | @HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE: + * | @HID_QUIRK_IGNORE_SPECIAL_DRIVER * | @HID_QUIRK_FULLSPEED_INTERVAL: * | @HID_QUIRK_NO_INIT_REPORTS: * | @HID_QUIRK_NO_IGNORE: @@ -384,6 +385,7 @@ struct hid_item { #define HID_QUIRK_HAVE_SPECIAL_DRIVER BIT(19) #define HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE BIT(20) #define HID_QUIRK_NOINVERT BIT(21) +#define HID_QUIRK_IGNORE_SPECIAL_DRIVER BIT(22) #define HID_QUIRK_FULLSPEED_INTERVAL BIT(28) #define HID_QUIRK_NO_INIT_REPORTS BIT(29) #define HID_QUIRK_NO_IGNORE BIT(30) @@ -599,15 +601,17 @@ enum hid_battery_status { struct hid_driver; struct hid_ll_driver; -struct hid_device { /* device report descriptor */ - __u8 *dev_rdesc; - unsigned dev_rsize; - __u8 *rdesc; - unsigned rsize; +struct hid_device { + const __u8 *dev_rdesc; /* device report descriptor */ + const __u8 *bpf_rdesc; /* bpf modified report descriptor, if any */ + const __u8 *rdesc; /* currently used report descriptor */ + unsigned int dev_rsize; + unsigned int bpf_rsize; + unsigned int rsize; + unsigned int collection_size; /* Number of allocated hid_collections */ struct hid_collection *collection; /* List of HID collections */ - unsigned collection_size; /* Number of allocated hid_collections */ - unsigned maxcollection; /* Number of parsed collections */ - unsigned maxapplication; /* Number of applications */ + unsigned int maxcollection; /* Number of parsed collections */ + unsigned int maxapplication; /* Number of applications */ __u16 bus; /* BUS ID */ __u16 group; /* Report group */ __u32 vendor; /* Vendor ID */ @@ -822,7 +826,7 @@ struct hid_driver { struct hid_usage *usage, __s32 value); void (*report)(struct hid_device *hdev, struct hid_report *report); - __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf, + const __u8 *(*report_fixup)(struct hid_device *hdev, __u8 *buf, unsigned int *size); int (*input_mapping)(struct hid_device *hdev, @@ -940,6 +944,8 @@ extern void hidinput_report_event(struct hid_device *hid, struct hid_report *rep extern int hidinput_connect(struct hid_device *hid, unsigned int force); extern void hidinput_disconnect(struct hid_device *); +struct hid_field *hid_find_field(struct hid_device *hdev, unsigned int report_type, + unsigned int application, unsigned int usage); int hid_set_field(struct hid_field *, unsigned, __s32); int hid_input_report(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt); @@ -953,7 +959,7 @@ struct hid_device *hid_allocate_device(void); struct hid_report *hid_register_report(struct hid_device *device, enum hid_report_type type, unsigned int id, unsigned int application); -int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); +int hid_parse_report(struct hid_device *hid, const __u8 *start, unsigned size); struct hid_report *hid_validate_values(struct hid_device *hid, enum hid_report_type type, unsigned int id, unsigned int field_index, @@ -972,7 +978,6 @@ const struct hid_device_id *hid_match_device(struct hid_device *hdev, struct hid_driver *hdrv); bool hid_compare_device_paths(struct hid_device *hdev_a, struct hid_device *hdev_b, char separator); -s32 hid_snto32(__u32 value, unsigned n); __u32 hid_field_extract(const struct hid_device *hid, __u8 *report, unsigned offset, unsigned n); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index d4d063cf63b5..a2e47dbcf82c 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -78,7 +78,7 @@ struct hid_ops { const struct bus_type *bus_type; }; -extern struct hid_ops *hid_ops; +extern const struct hid_ops *hid_ops; /** * struct hid_bpf_ops - A BPF struct_ops of callbacks allowing to attach HID-BPF @@ -212,7 +212,7 @@ int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); int hid_bpf_device_init(struct hid_device *hid); -u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size); +const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt, @@ -228,13 +228,8 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline int hid_bpf_device_init(struct hid_device *hid) { return 0; } -/* - * This specialized allocator has to be a macro for its allocations to be - * accounted separately (to have a separate alloc_tag). The typecast is - * intentional to enforce typesafety. - */ -#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \ - ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL)) +static inline const u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, const u8 *rdesc, + unsigned int *size) { return rdesc; } #endif /* CONFIG_HID_BPF */ diff --git a/include/linux/hidraw.h b/include/linux/hidraw.h index cd67f4ca5599..18fd30a288de 100644 --- a/include/linux/hidraw.h +++ b/include/linux/hidraw.h @@ -32,6 +32,7 @@ struct hidraw_list { struct hidraw *hidraw; struct list_head node; struct mutex read_mutex; + bool revoked; }; #ifdef CONFIG_HIDRAW diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 930a591b9b61..6e452bd8e7e3 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -224,13 +224,7 @@ static inline struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr) { - struct folio *folio; - - folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr, false); - if (folio) - clear_user_highpage(&folio->page, vaddr); - - return folio; + return vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr); } #endif diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 9d7754ad5e9b..6dbd0d49628f 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -229,6 +229,12 @@ struct hisi_qm_status { struct hisi_qm; +enum acc_err_result { + ACC_ERR_NONE, + ACC_ERR_NEED_RESET, + ACC_ERR_RECOVERED, +}; + struct hisi_qm_err_info { char *acpi_rst; u32 msi_wr_port; @@ -257,9 +263,9 @@ struct hisi_qm_err_ini { void (*close_axi_master_ooo)(struct hisi_qm *qm); void (*open_sva_prefetch)(struct hisi_qm *qm); void (*close_sva_prefetch)(struct hisi_qm *qm); - void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts); void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); + enum acc_err_result (*get_err_result)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { @@ -274,13 +280,25 @@ struct hisi_qm_cap_info { u32 v3_val; }; +struct hisi_qm_cap_query_info { + u32 type; + const char *name; + u32 offset; + u32 v1_val; + u32 v2_val; + u32 v3_val; +}; + struct hisi_qm_cap_record { u32 type; + const char *name; u32 cap_val; }; struct hisi_qm_cap_tables { + u32 qm_cap_size; struct hisi_qm_cap_record *qm_cap_table; + u32 dev_cap_size; struct hisi_qm_cap_record *dev_cap_table; }; @@ -436,37 +454,6 @@ struct hisi_qp { struct uacce_queue *uacce_q; }; -static inline int q_num_set(const char *val, const struct kernel_param *kp, - unsigned int device) -{ - struct pci_dev *pdev; - u32 n, q_num; - int ret; - - if (!val) - return -EINVAL; - - pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL); - if (!pdev) { - q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); - pr_info("No device found currently, suppose queue number is %u\n", - q_num); - } else { - if (pdev->revision == QM_HW_V1) - q_num = QM_QNUM_V1; - else - q_num = QM_QNUM_V2; - - pci_dev_put(pdev); - } - - ret = kstrtou32(val, 10, &n); - if (ret || n < QM_MIN_QNUM || n > q_num) - return -EINVAL; - - return param_set_int(val, kp); -} - static inline int vfs_num_set(const char *val, const struct kernel_param *kp) { u32 n; @@ -526,6 +513,8 @@ static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_ mutex_unlock(&qm_list->lock); } +int hisi_qm_q_num_set(const char *val, const struct kernel_param *kp, + unsigned int device); int hisi_qm_init(struct hisi_qm *qm); void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); @@ -583,6 +572,9 @@ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset); u32 hisi_qm_get_hw_info(struct hisi_qm *qm, const struct hisi_qm_cap_info *info_table, u32 index, bool is_read); +u32 hisi_qm_get_cap_value(struct hisi_qm *qm, + const struct hisi_qm_cap_query_info *info_table, + u32 index, bool is_read); int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *dev_algs, u32 dev_algs_size); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 9c8119ed13a4..9fa9c30a34e6 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -14,12 +14,17 @@ enum host1x_class { HOST1X_CLASS_HOST1X = 0x1, + HOST1X_CLASS_NVJPG1 = 0x7, + HOST1X_CLASS_NVENC = 0x21, + HOST1X_CLASS_NVENC1 = 0x22, HOST1X_CLASS_GR2D = 0x51, HOST1X_CLASS_GR2D_SB = 0x52, HOST1X_CLASS_VIC = 0x5D, HOST1X_CLASS_GR3D = 0x60, + HOST1X_CLASS_NVJPG = 0xC0, HOST1X_CLASS_NVDEC = 0xF0, HOST1X_CLASS_NVDEC1 = 0xF5, + HOST1X_CLASS_OFA = 0xF8, }; struct host1x; @@ -466,6 +471,7 @@ struct host1x_memory_context { refcount_t ref; struct pid *owner; + struct device_dma_parameters dma_parms; struct device dev; u64 dma_mask; u32 stream_id; diff --git a/include/linux/host1x_context_bus.h b/include/linux/host1x_context_bus.h index 72462737a6db..c928cb432680 100644 --- a/include/linux/host1x_context_bus.h +++ b/include/linux/host1x_context_bus.h @@ -9,7 +9,7 @@ #include <linux/device.h> #ifdef CONFIG_TEGRA_HOST1X_CONTEXT_BUS -extern struct bus_type host1x_context_device_bus_type; +extern const struct bus_type host1x_context_device_bus_type; #endif #endif diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index aa1e65ccb615..7ef5f7ef31a9 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -228,32 +228,17 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, - enum hrtimer_mode mode); +extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), + clockid_t clock_id, enum hrtimer_mode mode); +extern void hrtimer_setup_on_stack(struct hrtimer *timer, + enum hrtimer_restart (*function)(struct hrtimer *), + clockid_t clock_id, enum hrtimer_mode mode); +extern void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); -extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, - clockid_t clock_id, - enum hrtimer_mode mode); - extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else -static inline void hrtimer_init_on_stack(struct hrtimer *timer, - clockid_t which_clock, - enum hrtimer_mode mode) -{ - hrtimer_init(timer, which_clock, mode); -} - -static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, - clockid_t clock_id, - enum hrtimer_mode mode) -{ - hrtimer_init_sleeper(sl, clock_id, mode); -} - static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif @@ -337,6 +322,28 @@ static inline int hrtimer_callback_running(struct hrtimer *timer) return timer->base->running == timer; } +/** + * hrtimer_update_function - Update the timer's callback function + * @timer: Timer to update + * @function: New callback function + * + * Only safe to call if the timer is not enqueued. Can be called in the callback function if the + * timer is not enqueued at the same time (see the comments above HRTIMER_STATE_ENQUEUED). + */ +static inline void hrtimer_update_function(struct hrtimer *timer, + enum hrtimer_restart (*function)(struct hrtimer *)) +{ + guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock); + + if (WARN_ON_ONCE(hrtimer_is_queued(timer))) + return; + + if (WARN_ON_ONCE(!function)) + return; + + timer->function = function; +} + /* Forward a hrtimer so it expires after now: */ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index e25d9ebfdf89..b94c2e8ee918 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -2,7 +2,6 @@ #ifndef _LINUX_HUGE_MM_H #define _LINUX_HUGE_MM_H -#include <linux/sched/coredump.h> #include <linux/mm_types.h> #include <linux/fs.h> /* only for vma_is_dax() */ @@ -76,9 +75,9 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; /* * Mask of all large folio orders supported for file THP. Folios in a DAX * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to - * it. + * it. Same to PFNMAPs where there's neither page* nor pagecache. */ -#define THP_ORDERS_ALL_FILE_DAX \ +#define THP_ORDERS_ALL_SPECIAL \ (BIT(PMD_ORDER) | BIT(PUD_ORDER)) #define THP_ORDERS_ALL_FILE_DEFAULT \ ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0)) @@ -87,7 +86,7 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; * Mask of all large folio orders supported for THP. */ #define THP_ORDERS_ALL \ - (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT) + (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_SPECIAL | THP_ORDERS_ALL_FILE_DEFAULT) #define TVA_SMAPS (1 << 0) /* Will be used for procfs */ #define TVA_IN_PF (1 << 1) /* Page fault handler */ @@ -96,6 +95,8 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) +#define split_folio(f) split_folio_to_list(f, NULL) + #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PUD_SHIFT PUD_SHIFT @@ -114,6 +115,55 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) +enum mthp_stat_item { + MTHP_STAT_ANON_FAULT_ALLOC, + MTHP_STAT_ANON_FAULT_FALLBACK, + MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_ZSWPOUT, + MTHP_STAT_SWPIN, + MTHP_STAT_SWPOUT, + MTHP_STAT_SWPOUT_FALLBACK, + MTHP_STAT_SHMEM_ALLOC, + MTHP_STAT_SHMEM_FALLBACK, + MTHP_STAT_SHMEM_FALLBACK_CHARGE, + MTHP_STAT_SPLIT, + MTHP_STAT_SPLIT_FAILED, + MTHP_STAT_SPLIT_DEFERRED, + MTHP_STAT_NR_ANON, + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, + __MTHP_STAT_COUNT +}; + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) +struct mthp_stat { + unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT]; +}; + +DECLARE_PER_CPU(struct mthp_stat, mthp_stats); + +static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta) +{ + if (order <= 0 || order > PMD_ORDER) + return; + + this_cpu_add(mthp_stats.stats[order][item], delta); +} + +static inline void count_mthp_stat(int order, enum mthp_stat_item item) +{ + mod_mthp_stat(order, item, 1); +} + +#else +static inline void mod_mthp_stat(int order, enum mthp_stat_item item, int delta) +{ +} + +static inline void count_mthp_stat(int order, enum mthp_stat_item item) +{ +} +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern unsigned long transparent_hugepage_flags; @@ -204,19 +254,6 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, return orders; } -static inline bool file_thp_enabled(struct vm_area_struct *vma) -{ - struct inode *inode; - - if (!vma->vm_file) - return false; - - inode = vma->vm_file->f_inode; - - return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) && - !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode); -} - unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, unsigned long tva_flags, @@ -269,44 +306,27 @@ struct thpsize { #define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj) -enum mthp_stat_item { - MTHP_STAT_ANON_FAULT_ALLOC, - MTHP_STAT_ANON_FAULT_FALLBACK, - MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, - MTHP_STAT_SWPOUT, - MTHP_STAT_SWPOUT_FALLBACK, - MTHP_STAT_SHMEM_ALLOC, - MTHP_STAT_SHMEM_FALLBACK, - MTHP_STAT_SHMEM_FALLBACK_CHARGE, - MTHP_STAT_SPLIT, - MTHP_STAT_SPLIT_FAILED, - MTHP_STAT_SPLIT_DEFERRED, - __MTHP_STAT_COUNT -}; - -struct mthp_stat { - unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT]; -}; - -#ifdef CONFIG_SYSFS -DECLARE_PER_CPU(struct mthp_stat, mthp_stats); +#define transparent_hugepage_use_zero_page() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) -static inline void count_mthp_stat(int order, enum mthp_stat_item item) +static inline bool vma_thp_disabled(struct vm_area_struct *vma, + unsigned long vm_flags) { - if (order <= 0 || order > PMD_ORDER) - return; - - this_cpu_inc(mthp_stats.stats[order][item]); + /* + * Explicitly disabled through madvise or prctl, or some + * architectures may disable THP for some mappings, for + * example, s390 kvm. + */ + return (vm_flags & VM_NOHUGEPAGE) || + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags); } -#else -static inline void count_mthp_stat(int order, enum mthp_stat_item item) + +static inline bool thp_disabled_by_hw(void) { + /* If the hardware/firmware marked hugepage support disabled. */ + return transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED); } -#endif - -#define transparent_hugepage_use_zero_page() \ - (transparent_hugepage_flags & \ - (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); @@ -314,14 +334,29 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); -bool can_split_folio(struct folio *folio, int *pextra_pins); +bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins); int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); +int min_order_for_split(struct folio *folio); +int split_folio_to_list(struct folio *folio, struct list_head *list); static inline int split_huge_page(struct page *page) { - return split_huge_page_to_list_to_order(page, NULL, 0); + struct folio *folio = page_folio(page); + int ret = min_order_for_split(folio); + + if (ret < 0) + return ret; + + /* + * split_huge_page() locks the page before splitting and + * expects the same page that has been split to be locked when + * returned. split_folio(page_folio(page)) cannot be used here + * because it converts the page to folio and passes the head + * page to be split. + */ + return split_huge_page_to_list_to_order(page, NULL, ret); } -void deferred_split_folio(struct folio *folio); +void deferred_split_folio(struct folio *folio, bool partially_mapped); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio); @@ -342,6 +377,17 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +int change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, + pud_t *pudp, unsigned long addr, pgprot_t newprot, + unsigned long cp_flags); +#else +static inline int +change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, + pud_t *pudp, unsigned long addr, pgprot_t newprot, + unsigned long cp_flags) { return 0; } +#endif + #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ @@ -410,11 +456,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd); } -static inline bool is_huge_zero_pud(pud_t pud) -{ - return false; -} - struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); void mm_put_huge_zero_folio(struct mm_struct *mm); @@ -470,7 +511,7 @@ thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, } static inline bool -can_split_folio(struct folio *folio, int *pextra_pins) +can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) { return false; } @@ -484,7 +525,13 @@ static inline int split_huge_page(struct page *page) { return 0; } -static inline void deferred_split_folio(struct folio *folio) {} + +static inline int split_folio_to_list(struct folio *folio, struct list_head *list) +{ + return 0; +} + +static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) @@ -555,11 +602,6 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return false; } -static inline bool is_huge_zero_pud(pud_t pud) -{ - return false; -} - static inline void mm_put_huge_zero_folio(struct mm_struct *mm) { return; @@ -585,6 +627,19 @@ static inline int next_order(unsigned long *orders, int prev) { return 0; } + +static inline void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, + unsigned long address) +{ +} + +static inline int change_huge_pud(struct mmu_gather *tlb, + struct vm_area_struct *vma, pud_t *pudp, + unsigned long addr, pgprot_t newprot, + unsigned long cp_flags) +{ + return 0; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, @@ -598,7 +653,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order) return split_folio_to_list_to_order(folio, NULL, new_order); } -#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) -#define split_folio(f) split_folio_to_order(f, 0) - #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 45bf05ad5c53..ae4fe8615bb6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -127,9 +127,6 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); -struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, - unsigned long address, unsigned int flags, - unsigned int *page_mask); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); @@ -549,16 +546,10 @@ static inline struct hstate *hstate_inode(struct inode *i) } #endif /* !CONFIG_HUGETLBFS */ -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); -#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ - unsigned long -generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* * huegtlb page specific state flags. These flags are located in page.private @@ -695,6 +686,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback); +struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask); + int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, @@ -899,10 +893,11 @@ static inline bool hugepage_movable_supported(struct hstate *h) /* Movability of hugepages depends on migration support. */ static inline gfp_t htlb_alloc_mask(struct hstate *h) { - if (hugepage_movable_supported(h)) - return GFP_HIGHUSER_MOVABLE; - else - return GFP_HIGHUSER; + gfp_t gfp = __GFP_COMP | __GFP_NOWARN; + + gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; + + return gfp; } static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) @@ -1034,9 +1029,19 @@ void hugetlb_unregister_node(struct node *node); */ bool is_raw_hwpoison_page_in_hugepage(struct page *page); +static inline unsigned long huge_page_mask_align(struct file *file) +{ + return PAGE_MASK & ~huge_page_mask(hstate_file(file)); +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline unsigned long huge_page_mask_align(struct file *file) +{ + return 0; +} + static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { return NULL; @@ -1062,6 +1067,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, } static inline struct folio * +alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, + nodemask_t *nmask, gfp_t gfp_mask) +{ + return NULL; +} + +static inline struct folio * alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback) @@ -1251,7 +1263,7 @@ static inline __init void hugetlb_cma_reserve(int order) } #endif -#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING static inline bool hugetlb_pmd_shared(pte_t *pte) { return page_count(virt_to_page(pte)) > 1; @@ -1287,8 +1299,7 @@ bool __vma_private_lock(struct vm_area_struct *vma); static inline pte_t * hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) { -#if defined(CONFIG_HUGETLB_PAGE) && \ - defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) +#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP) struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; /* diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index e94314760aab..3a63dff62d03 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -368,7 +368,9 @@ enum hwmon_intrusion_attributes { /** * struct hwmon_ops - hwmon device operations - * @is_visible: Callback to return attribute visibility. Mandatory. + * @visible: Static visibility. If non-zero, 'is_visible' is ignored. + * @is_visible: Callback to return attribute visibility. Mandatory unless + * 'visible' is non-zero. * Parameters are: * @const void *drvdata: * Pointer to driver-private data structure passed @@ -412,6 +414,7 @@ enum hwmon_intrusion_attributes { * The function returns 0 on success or a negative error number. */ struct hwmon_ops { + umode_t visible; umode_t (*is_visible)(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int channel); int (*read)(struct device *dev, enum hwmon_sensor_types type, @@ -481,7 +484,6 @@ devm_hwmon_device_register_with_info(struct device *dev, const struct attribute_group **extra_groups); void hwmon_device_unregister(struct device *dev); -void devm_hwmon_device_unregister(struct device *dev); int hwmon_notify_event(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel); diff --git a/include/linux/i2c-of-prober.h b/include/linux/i2c-of-prober.h new file mode 100644 index 000000000000..bb6d47f50ee5 --- /dev/null +++ b/include/linux/i2c-of-prober.h @@ -0,0 +1,140 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Definitions for the Linux I2C OF component prober + * + * Copyright (C) 2024 Google LLC + */ + +#ifndef _LINUX_I2C_OF_PROBER_H +#define _LINUX_I2C_OF_PROBER_H + +#include <linux/kconfig.h> +#include <linux/types.h> + +struct device; +struct device_node; + +/** + * struct i2c_of_probe_ops - I2C OF component prober callbacks + * + * A set of callbacks to be used by i2c_of_probe_component(). + * + * All callbacks are optional. Callbacks are called only once per run, and are + * used in the order they are defined in this structure. + * + * All callbacks that have return values shall return %0 on success, + * or a negative error number on failure. + * + * The @dev parameter passed to the callbacks is the same as @dev passed to + * i2c_of_probe_component(). It should only be used for dev_printk() calls + * and nothing else, especially not managed device resource (devres) APIs. + */ +struct i2c_of_probe_ops { + /** + * @enable: Retrieve and enable resources so that the components respond to probes. + * + * It is OK for this callback to return -EPROBE_DEFER since the intended use includes + * retrieving resources and enables them. Resources should be reverted to their initial + * state and released before returning if this fails. + */ + int (*enable)(struct device *dev, struct device_node *bus_node, void *data); + + /** + * @cleanup_early: Release exclusive resources prior to calling probe() on a + * detected component. + * + * Only called if a matching component is actually found. If none are found, + * resources that would have been released in this callback should be released in + * @free_resourcs_late instead. + */ + void (*cleanup_early)(struct device *dev, void *data); + + /** + * @cleanup: Opposite of @enable to balance refcounts and free resources after probing. + * + * Should check if resources were already freed by @cleanup_early. + */ + void (*cleanup)(struct device *dev, void *data); +}; + +/** + * struct i2c_of_probe_cfg - I2C OF component prober configuration + * @ops: Callbacks for the prober to use. + * @type: A string to match the device node name prefix to probe for. + */ +struct i2c_of_probe_cfg { + const struct i2c_of_probe_ops *ops; + const char *type; +}; + +#if IS_ENABLED(CONFIG_OF_DYNAMIC) + +int i2c_of_probe_component(struct device *dev, const struct i2c_of_probe_cfg *cfg, void *ctx); + +/** + * DOC: I2C OF component prober simple helpers + * + * Components such as trackpads are commonly connected to a devices baseboard + * with a 6-pin ribbon cable. That gives at most one voltage supply and one + * GPIO (commonly a "enable" or "reset" line) besides the I2C bus, interrupt + * pin, and common ground. Touchscreens, while integrated into the display + * panel's connection, typically have the same set of connections. + * + * A simple set of helpers are provided here for use with the I2C OF component + * prober. This implementation targets such components, allowing for at most + * one regulator supply. + * + * The following helpers are provided: + * * i2c_of_probe_simple_enable() + * * i2c_of_probe_simple_cleanup_early() + * * i2c_of_probe_simple_cleanup() + */ + +/** + * struct i2c_of_probe_simple_opts - Options for simple I2C component prober callbacks + * @res_node_compatible: Compatible string of device node to retrieve resources from. + * @supply_name: Name of regulator supply. + * @gpio_name: Name of GPIO. NULL if no GPIO line is used. Empty string ("") if GPIO + * line is unnamed. + * @post_power_on_delay_ms: Delay after regulators are powered on. Passed to msleep(). + * @post_gpio_config_delay_ms: Delay after GPIO is configured. Passed to msleep(). + * @gpio_assert_to_enable: %true if GPIO should be asserted, i.e. set to logical high, + * to enable the component. + * + * This describes power sequences common for the class of components supported by the + * simple component prober: + * * @gpio_name is configured to the non-active setting according to @gpio_assert_to_enable. + * * @supply_name regulator supply is enabled. + * * Wait for @post_power_on_delay_ms to pass. + * * @gpio_name is configured to the active setting according to @gpio_assert_to_enable. + * * Wait for @post_gpio_config_delay_ms to pass. + */ +struct i2c_of_probe_simple_opts { + const char *res_node_compatible; + const char *supply_name; + const char *gpio_name; + unsigned int post_power_on_delay_ms; + unsigned int post_gpio_config_delay_ms; + bool gpio_assert_to_enable; +}; + +struct gpio_desc; +struct regulator; + +struct i2c_of_probe_simple_ctx { + /* public: provided by user before helpers are used. */ + const struct i2c_of_probe_simple_opts *opts; + /* private: internal fields for helpers. */ + struct regulator *supply; + struct gpio_desc *gpiod; +}; + +int i2c_of_probe_simple_enable(struct device *dev, struct device_node *bus_node, void *data); +void i2c_of_probe_simple_cleanup_early(struct device *dev, void *data); +void i2c_of_probe_simple_cleanup(struct device *dev, void *data); + +extern struct i2c_of_probe_ops i2c_of_probe_simple_ops; + +#endif /* IS_ENABLED(CONFIG_OF_DYNAMIC) */ + +#endif /* _LINUX_I2C_OF_PROBER_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 377def497298..388ce71a29a9 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -761,6 +761,9 @@ struct i2c_adapter { struct regulator *bus_regulator; struct dentry *debugfs; + + /* 7bit address space */ + DECLARE_BITMAP(addrs_in_instantiation, 1 << 7); }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 074f632868d9..12d532b012c5 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -278,13 +278,29 @@ enum i3c_bus_mode { }; /** + * enum i3c_open_drain_speed - I3C open-drain speed + * @I3C_OPEN_DRAIN_SLOW_SPEED: Slow open-drain speed for sending the first + * broadcast address. The first broadcast address at this speed + * will be visible to all devices on the I3C bus. I3C devices + * working in I2C mode will turn off their spike filter when + * switching into I3C mode. + * @I3C_OPEN_DRAIN_NORMAL_SPEED: Normal open-drain speed in I3C bus mode. + */ +enum i3c_open_drain_speed { + I3C_OPEN_DRAIN_SLOW_SPEED, + I3C_OPEN_DRAIN_NORMAL_SPEED, +}; + +/** * enum i3c_addr_slot_status - I3C address slot status * @I3C_ADDR_SLOT_FREE: address is free * @I3C_ADDR_SLOT_RSVD: address is reserved * @I3C_ADDR_SLOT_I2C_DEV: address is assigned to an I2C device * @I3C_ADDR_SLOT_I3C_DEV: address is assigned to an I3C device * @I3C_ADDR_SLOT_STATUS_MASK: address slot mask - * + * @I3C_ADDR_SLOT_EXT_STATUS_MASK: address slot mask with extended information + * @I3C_ADDR_SLOT_EXT_DESIRED: the bitmask represents addresses that are preferred by some devices, + * such as the "assigned-address" property in a device tree source. * On an I3C bus, addresses are assigned dynamically, and we need to know which * addresses are free to use and which ones are already assigned. * @@ -297,8 +313,12 @@ enum i3c_addr_slot_status { I3C_ADDR_SLOT_I2C_DEV, I3C_ADDR_SLOT_I3C_DEV, I3C_ADDR_SLOT_STATUS_MASK = 3, + I3C_ADDR_SLOT_EXT_STATUS_MASK = 7, + I3C_ADDR_SLOT_EXT_DESIRED = BIT(2), }; +#define I3C_ADDR_SLOT_STATUS_BITS 4 + /** * struct i3c_bus - I3C bus object * @cur_master: I3C master currently driving the bus. Since I3C is multi-master @@ -340,7 +360,7 @@ enum i3c_addr_slot_status { struct i3c_bus { struct i3c_dev_desc *cur_master; int id; - unsigned long addrslots[((I2C_MAX_ADDR + 1) * 2) / BITS_PER_LONG]; + unsigned long addrslots[((I2C_MAX_ADDR + 1) * I3C_ADDR_SLOT_STATUS_BITS) / BITS_PER_LONG]; enum i3c_bus_mode mode; struct { unsigned long i3c; @@ -436,6 +456,7 @@ struct i3c_bus { * NULL. * @enable_hotjoin: enable hot join event detect. * @disable_hotjoin: disable hot join event detect. + * @set_speed: adjust I3C open drain mode timing. */ struct i3c_master_controller_ops { int (*bus_init)(struct i3c_master_controller *master); @@ -464,6 +485,7 @@ struct i3c_master_controller_ops { struct i3c_ibi_slot *slot); int (*enable_hotjoin)(struct i3c_master_controller *master); int (*disable_hotjoin)(struct i3c_master_controller *master); + int (*set_speed)(struct i3c_master_controller *master, enum i3c_open_drain_speed speed); }; /** diff --git a/include/linux/i8253.h b/include/linux/i8253.h index 8336b2f6f834..56c280eb2d4f 100644 --- a/include/linux/i8253.h +++ b/include/linux/i8253.h @@ -21,9 +21,9 @@ #define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ) extern raw_spinlock_t i8253_lock; -extern bool i8253_clear_counter_on_shutdown; extern struct clock_event_device i8253_clockevent; extern void clockevent_i8253_init(bool oneshot); +extern void clockevent_i8253_disable(void); extern void setup_pit_timer(void); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 30cef3b940eb..05dedc45505c 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -20,7 +20,7 @@ #include <linux/etherdevice.h> #include <linux/bitfield.h> #include <asm/byteorder.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> /* * DS bit usage @@ -1445,6 +1445,8 @@ struct ieee80211_mgmt { __le16 status; __le16 capab; __le16 timeout; + /* followed by BA Extension */ + u8 variable[]; } __packed addba_resp; struct{ u8 action_code; diff --git a/include/linux/if_ltalk.h b/include/linux/if_ltalk.h deleted file mode 100644 index 4cc1c0b77870..000000000000 --- a/include/linux/if_ltalk.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_LTALK_H -#define __LINUX_LTALK_H - -#include <uapi/linux/if_ltalk.h> - -extern struct net_device *alloc_ltalkdev(int sizeof_priv); -#endif diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h index 839d1e48b85e..c44bf6e80ecb 100644 --- a/include/linux/if_rmnet.h +++ b/include/linux/if_rmnet.h @@ -42,7 +42,7 @@ struct rmnet_map_ul_csum_header { /* csum_info field: * OFFSET: where (offset in bytes) to insert computed checksum - * UDP: 1 = UDP checksum (zero checkum means no checksum) + * UDP: 1 = UDP checksum (zero checksum means no checksum) * ENABLED: 1 = checksum computation requested */ #define MAP_CSUM_UL_OFFSET_MASK GENMASK(13, 0) diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 8099759d7242..10be00f3b120 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -3,6 +3,7 @@ #define _IIO_BACKEND_H_ #include <linux/types.h> +#include <linux/iio/iio.h> struct iio_chan_spec; struct fwnode_handle; @@ -13,15 +14,19 @@ struct iio_dev; enum iio_backend_data_type { IIO_BACKEND_TWOS_COMPLEMENT, IIO_BACKEND_OFFSET_BINARY, + IIO_BACKEND_DATA_UNSIGNED, IIO_BACKEND_DATA_TYPE_MAX }; enum iio_backend_data_source { - IIO_BACKEND_INTERNAL_CONTINUOS_WAVE, + IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE, IIO_BACKEND_EXTERNAL, + IIO_BACKEND_INTERNAL_RAMP_16BIT, IIO_BACKEND_DATA_SOURCE_MAX }; +#define iio_backend_debugfs_ptr(ptr) PTR_IF(IS_ENABLED(CONFIG_DEBUG_FS), ptr) + /** * IIO_BACKEND_EX_INFO - Helper for an IIO extended channel attribute * @_name: Attribute name @@ -54,6 +59,8 @@ enum iio_backend_test_pattern { IIO_BACKEND_NO_TEST_PATTERN, /* modified prbs9 */ IIO_BACKEND_ADI_PRBS_9A = 32, + /* modified prbs23 */ + IIO_BACKEND_ADI_PRBS_23A, IIO_BACKEND_TEST_PATTERN_MAX }; @@ -81,6 +88,14 @@ enum iio_backend_sample_trigger { * @extend_chan_spec: Extend an IIO channel. * @ext_info_set: Extended info setter. * @ext_info_get: Extended info getter. + * @read_raw: Read a channel attribute from a backend device + * @debugfs_print_chan_status: Print channel status into a buffer. + * @debugfs_reg_access: Read or write register value of backend. + * @ddr_enable: Enable interface DDR (Double Data Rate) mode. + * @ddr_disable: Disable interface DDR (Double Data Rate) mode. + * @data_stream_enable: Enable data stream. + * @data_stream_disable: Disable data stream. + * @data_transfer_addr: Set data address. **/ struct iio_backend_ops { int (*enable)(struct iio_backend *back); @@ -113,11 +128,36 @@ struct iio_backend_ops { const char *buf, size_t len); int (*ext_info_get)(struct iio_backend *back, uintptr_t private, const struct iio_chan_spec *chan, char *buf); + int (*read_raw)(struct iio_backend *back, + struct iio_chan_spec const *chan, int *val, int *val2, + long mask); + int (*debugfs_print_chan_status)(struct iio_backend *back, + unsigned int chan, char *buf, + size_t len); + int (*debugfs_reg_access)(struct iio_backend *back, unsigned int reg, + unsigned int writeval, unsigned int *readval); + int (*ddr_enable)(struct iio_backend *back); + int (*ddr_disable)(struct iio_backend *back); + int (*data_stream_enable)(struct iio_backend *back); + int (*data_stream_disable)(struct iio_backend *back); + int (*data_transfer_addr)(struct iio_backend *back, u32 address); +}; + +/** + * struct iio_backend_info - info structure for an iio_backend + * @name: Backend name. + * @ops: Backend operations. + */ +struct iio_backend_info { + const char *name; + const struct iio_backend_ops *ops; }; int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan); int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan); int devm_iio_backend_enable(struct device *dev, struct iio_backend *back); +int iio_backend_enable(struct iio_backend *back); +void iio_backend_disable(struct iio_backend *back); int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan, const struct iio_backend_data_fmt *data); int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan, @@ -136,22 +176,51 @@ int iio_backend_data_sample_trigger(struct iio_backend *back, int devm_iio_backend_request_buffer(struct device *dev, struct iio_backend *back, struct iio_dev *indio_dev); +int iio_backend_ddr_enable(struct iio_backend *back); +int iio_backend_ddr_disable(struct iio_backend *back); +int iio_backend_data_stream_enable(struct iio_backend *back); +int iio_backend_data_stream_disable(struct iio_backend *back); +int iio_backend_data_transfer_addr(struct iio_backend *back, u32 address); ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, const char *buf, size_t len); ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf); - -int iio_backend_extend_chan_spec(struct iio_dev *indio_dev, - struct iio_backend *back, +int iio_backend_read_raw(struct iio_backend *back, + struct iio_chan_spec const *chan, int *val, int *val2, + long mask); +int iio_backend_extend_chan_spec(struct iio_backend *back, struct iio_chan_spec *chan); void *iio_backend_get_priv(const struct iio_backend *conv); struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name); +struct iio_backend *devm_iio_backend_fwnode_get(struct device *dev, + const char *name, + struct fwnode_handle *fwnode); struct iio_backend * __devm_iio_backend_get_from_fwnode_lookup(struct device *dev, struct fwnode_handle *fwnode); int devm_iio_backend_register(struct device *dev, - const struct iio_backend_ops *ops, void *priv); + const struct iio_backend_info *info, void *priv); + +static inline int iio_backend_read_scale(struct iio_backend *back, + struct iio_chan_spec const *chan, + int *val, int *val2) +{ + return iio_backend_read_raw(back, chan, val, val2, IIO_CHAN_INFO_SCALE); +} + +static inline int iio_backend_read_offset(struct iio_backend *back, + struct iio_chan_spec const *chan, + int *val, int *val2) +{ + return iio_backend_read_raw(back, chan, val, val2, + IIO_CHAN_INFO_OFFSET); +} +ssize_t iio_backend_debugfs_print_chan_status(struct iio_backend *back, + unsigned int chan, char *buf, + size_t len); +void iio_backend_debugfs_add(struct iio_backend *back, + struct iio_dev *indio_dev); #endif diff --git a/include/linux/iio/driver.h b/include/linux/iio/driver.h index 7a157ed218f6..7f8b55551ed0 100644 --- a/include/linux/iio/driver.h +++ b/include/linux/iio/driver.h @@ -18,7 +18,7 @@ struct iio_map; * @map: array of mappings specifying association of channel with client */ int iio_map_array_register(struct iio_dev *indio_dev, - struct iio_map *map); + const struct iio_map *map); /** * iio_map_array_unregister() - tell the core to remove consumer mappings for @@ -38,6 +38,7 @@ int iio_map_array_unregister(struct iio_dev *indio_dev); * handle de-registration of the IIO map object when the device's refcount goes to * zero. */ -int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps); +int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, + const struct iio_map *maps); #endif diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h index a4558c45a548..72062a0c7c87 100644 --- a/include/linux/iio/events.h +++ b/include/linux/iio/events.h @@ -10,7 +10,7 @@ #include <uapi/linux/iio/events.h> /** - * IIO_EVENT_CODE() - create event identifier + * _IIO_EVENT_CODE() - create event identifier * @chan_type: Type of the channel. Should be one of enum iio_chan_type. * @diff: Whether the event is for an differential channel or not. * @modifier: Modifier for the channel. Should be one of enum iio_modifier. @@ -19,10 +19,13 @@ * @chan: Channel number for non-differential channels. * @chan1: First channel number for differential channels. * @chan2: Second channel number for differential channels. + * + * Drivers should use the specialized macros below instead of using this one + * directly. */ -#define IIO_EVENT_CODE(chan_type, diff, modifier, direction, \ - type, chan, chan1, chan2) \ +#define _IIO_EVENT_CODE(chan_type, diff, modifier, direction, \ + type, chan, chan1, chan2) \ (((u64)type << 56) | ((u64)diff << 55) | \ ((u64)direction << 48) | ((u64)modifier << 40) | \ ((u64)chan_type << 32) | (((u16)chan2) << 16) | ((u16)chan1) | \ @@ -30,7 +33,8 @@ /** - * IIO_MOD_EVENT_CODE() - create event identifier for modified channels + * IIO_MOD_EVENT_CODE() - create event identifier for modified (non + * differential) channels * @chan_type: Type of the channel. Should be one of enum iio_chan_type. * @number: Channel number. * @modifier: Modifier for the channel. Should be one of enum iio_modifier. @@ -40,10 +44,11 @@ #define IIO_MOD_EVENT_CODE(chan_type, number, modifier, \ type, direction) \ - IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0) + _IIO_EVENT_CODE(chan_type, 0, modifier, direction, type, number, 0, 0) /** - * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified channels + * IIO_UNMOD_EVENT_CODE() - create event identifier for unmodified (non + * differential) channels * @chan_type: Type of the channel. Should be one of enum iio_chan_type. * @number: Channel number. * @type: Type of the event. Should be one of enum iio_event_type. @@ -51,6 +56,18 @@ */ #define IIO_UNMOD_EVENT_CODE(chan_type, number, type, direction) \ - IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0) + _IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0) + +/** + * IIO_DIFF_EVENT_CODE() - create event identifier for differential channels + * @chan_type: Type of the channel. Should be one of enum iio_chan_type. + * @chan1: First channel number for differential channels. + * @chan2: Second channel number for differential channels. + * @type: Type of the event. Should be one of enum iio_event_type. + * @direction: Direction of the event. One of enum iio_event_direction. + */ + +#define IIO_DIFF_EVENT_CODE(chan_type, chan1, chan2, type, direction) \ + _IIO_EVENT_CODE(chan_type, 1, 0, direction, type, 0, chan1, chan2) #endif diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index 5aec3945555b..a89e7e43e441 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -70,7 +70,7 @@ struct iio_dev_opaque { #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_dentry; - unsigned cached_reg_addr; + unsigned int cached_reg_addr; char read_buf[20]; unsigned int read_buf_len; #endif diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 894309294182..ae65890d4567 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -282,11 +282,11 @@ struct iio_chan_spec { const struct iio_chan_spec_ext_info *ext_info; const char *extend_name; const char *datasheet_name; - unsigned modified:1; - unsigned indexed:1; - unsigned output:1; - unsigned differential:1; - unsigned has_ext_scan_type:1; + unsigned int modified:1; + unsigned int indexed:1; + unsigned int output:1; + unsigned int differential:1; + unsigned int has_ext_scan_type:1; }; @@ -514,7 +514,7 @@ struct iio_info { const struct iio_chan_spec *chan, enum iio_event_type type, enum iio_event_direction dir, - int state); + bool state); int (*read_event_value)(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, @@ -541,13 +541,13 @@ struct iio_info { int (*update_scan_mode)(struct iio_dev *indio_dev, const unsigned long *scan_mask); int (*debugfs_reg_access)(struct iio_dev *indio_dev, - unsigned reg, unsigned writeval, - unsigned *readval); + unsigned int reg, unsigned int writeval, + unsigned int *readval); int (*fwnode_xlate)(struct iio_dev *indio_dev, const struct fwnode_reference_args *iiospec); - int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned val); + int (*hwfifo_set_watermark)(struct iio_dev *indio_dev, unsigned int val); int (*hwfifo_flush_to_buffer)(struct iio_dev *indio_dev, - unsigned count); + unsigned int count); }; /** @@ -609,7 +609,7 @@ struct iio_dev { int scan_bytes; const unsigned long *available_scan_masks; - unsigned masklength; + unsigned int __private masklength; const unsigned long *active_scan_mask; bool scan_timestamp; struct iio_trigger *trig; @@ -624,7 +624,7 @@ struct iio_dev { const struct iio_info *info; const struct iio_buffer_setup_ops *setup_ops; - void *priv; + void *__private priv; }; int iio_device_id(struct iio_dev *indio_dev); @@ -785,7 +785,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ static inline void *iio_priv(const struct iio_dev *indio_dev) { - return indio_dev->priv; + return ACCESS_PRIVATE(indio_dev, priv); } void iio_device_free(struct iio_dev *indio_dev); @@ -810,10 +810,28 @@ static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev) } #endif +/** + * iio_device_suspend_triggering() - suspend trigger attached to an iio_dev + * @indio_dev: iio_dev associated with the device that will have triggers suspended + * + * Return 0 if successful, negative otherwise + **/ +int iio_device_suspend_triggering(struct iio_dev *indio_dev); + +/** + * iio_device_resume_triggering() - resume trigger attached to an iio_dev + * that was previously suspended with iio_device_suspend_triggering() + * @indio_dev: iio_dev associated with the device that will have triggers resumed + * + * Return 0 if successful, negative otherwise + **/ +int iio_device_resume_triggering(struct iio_dev *indio_dev); + #ifdef CONFIG_ACPI bool iio_read_acpi_mount_matrix(struct device *dev, struct iio_mount_matrix *orientation, char *acpi_method); +const char *iio_get_acpi_device_name_and_data(struct device *dev, const void **data); #else static inline bool iio_read_acpi_mount_matrix(struct device *dev, struct iio_mount_matrix *orientation, @@ -821,7 +839,16 @@ static inline bool iio_read_acpi_mount_matrix(struct device *dev, { return false; } +static inline const char * +iio_get_acpi_device_name_and_data(struct device *dev, const void **data) +{ + return NULL; +} #endif +static inline const char *iio_get_acpi_device_name(struct device *dev) +{ + return iio_get_acpi_device_name_and_data(dev, NULL); +} /** * iio_get_current_scan_type - Get the current scan type for a channel @@ -855,6 +882,26 @@ static inline const struct iio_scan_type return &chan->scan_type; } +/** + * iio_get_masklength - Get length of the channels mask + * @indio_dev: the IIO device to get the masklength for + */ +static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev) +{ + return ACCESS_PRIVATE(indio_dev, masklength); +} + +int iio_active_scan_mask_index(struct iio_dev *indio_dev); + +/** + * iio_for_each_active_channel - Iterated over active channels + * @indio_dev: the IIO device + * @chan: Holds the index of the enabled channel + */ +#define iio_for_each_active_channel(indio_dev, chan) \ + for_each_set_bit((chan), (indio_dev)->active_scan_mask, \ + iio_get_masklength(indio_dev)) + ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals); int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index cb5280e6cc21..5730ba6b1cfa 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -141,7 +141,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ARP_EVICT_NOCARRIER) struct in_ifaddr { - struct hlist_node hash; + struct hlist_node addr_lst; struct in_ifaddr __rcu *ifa_next; struct in_device *ifa_dev; struct rcu_head rcu_head; @@ -226,6 +226,10 @@ static __inline__ bool bad_mask(__be32 mask, __be32 addr) for (ifa = rtnl_dereference((in_dev)->ifa_list); ifa; \ ifa = rtnl_dereference(ifa->ifa_next)) +#define in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) \ + for (ifa = rtnl_net_dereference(net, (in_dev)->ifa_list); ifa; \ + ifa = rtnl_net_dereference(net, ifa->ifa_next)) + #define in_dev_for_each_ifa_rcu(ifa, in_dev) \ for (ifa = rcu_dereference((in_dev)->ifa_list); ifa; \ ifa = rcu_dereference(ifa->ifa_next)) @@ -252,6 +256,11 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->ip_ptr); } +static inline struct in_device *__in_dev_get_rtnl_net(const struct net_device *dev) +{ + return rtnl_net_dereference(dev_net(dev), dev->ip_ptr); +} + /* called with rcu_read_lock or rtnl held */ static inline bool ip_ignore_linkdown(const struct net_device *dev) { diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h new file mode 100644 index 000000000000..b94beab64610 --- /dev/null +++ b/include/linux/intel_vsec.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _INTEL_VSEC_H +#define _INTEL_VSEC_H + +#include <linux/auxiliary_bus.h> +#include <linux/bits.h> + +#define VSEC_CAP_TELEMETRY BIT(0) +#define VSEC_CAP_WATCHER BIT(1) +#define VSEC_CAP_CRASHLOG BIT(2) +#define VSEC_CAP_SDSI BIT(3) +#define VSEC_CAP_TPMI BIT(4) + +/* Intel DVSEC offsets */ +#define INTEL_DVSEC_ENTRIES 0xA +#define INTEL_DVSEC_SIZE 0xB +#define INTEL_DVSEC_TABLE 0xC +#define INTEL_DVSEC_TABLE_BAR(x) ((x) & GENMASK(2, 0)) +#define INTEL_DVSEC_TABLE_OFFSET(x) ((x) & GENMASK(31, 3)) +#define TABLE_OFFSET_SHIFT 3 + +struct pci_dev; +struct resource; + +enum intel_vsec_id { + VSEC_ID_TELEMETRY = 2, + VSEC_ID_WATCHER = 3, + VSEC_ID_CRASHLOG = 4, + VSEC_ID_SDSI = 65, + VSEC_ID_TPMI = 66, +}; + +/** + * struct intel_vsec_header - Common fields of Intel VSEC and DVSEC registers. + * @rev: Revision ID of the VSEC/DVSEC register space + * @length: Length of the VSEC/DVSEC register space + * @id: ID of the feature + * @num_entries: Number of instances of the feature + * @entry_size: Size of the discovery table for each feature + * @tbir: BAR containing the discovery tables + * @offset: BAR offset of start of the first discovery table + */ +struct intel_vsec_header { + u8 rev; + u16 length; + u16 id; + u8 num_entries; + u8 entry_size; + u8 tbir; + u32 offset; +}; + +enum intel_vsec_quirks { + /* Watcher feature not supported */ + VSEC_QUIRK_NO_WATCHER = BIT(0), + + /* Crashlog feature not supported */ + VSEC_QUIRK_NO_CRASHLOG = BIT(1), + + /* Use shift instead of mask to read discovery table offset */ + VSEC_QUIRK_TABLE_SHIFT = BIT(2), + + /* DVSEC not present (provided in driver data) */ + VSEC_QUIRK_NO_DVSEC = BIT(3), + + /* Platforms requiring quirk in the auxiliary driver */ + VSEC_QUIRK_EARLY_HW = BIT(4), +}; + +/** + * struct pmt_callbacks - Callback infrastructure for PMT devices + * ->read_telem() when specified, called by client driver to access PMT data (instead + * of direct copy). + * @pdev: PCI device reference for the callback's use + * @guid: ID of data to acccss + * @data: buffer for the data to be copied + * @off: offset into the requested buffer + * @count: size of buffer + */ +struct pmt_callbacks { + int (*read_telem)(struct pci_dev *pdev, u32 guid, u64 *data, loff_t off, u32 count); +}; + +/** + * struct intel_vsec_platform_info - Platform specific data + * @parent: parent device in the auxbus chain + * @headers: list of headers to define the PMT client devices to create + * @priv_data: private data, usable by parent devices, currently a callback + * @caps: bitmask of PMT capabilities for the given headers + * @quirks: bitmask of VSEC device quirks + * @base_addr: allow a base address to be specified (rather than derived) + */ +struct intel_vsec_platform_info { + struct device *parent; + struct intel_vsec_header **headers; + void *priv_data; + unsigned long caps; + unsigned long quirks; + u64 base_addr; +}; + +/** + * struct intel_sec_device - Auxbus specific device information + * @auxdev: auxbus device struct for auxbus access + * @pcidev: pci device associated with the device + * @resource: any resources shared by the parent + * @ida: id reference + * @num_resources: number of resources + * @id: xarray id + * @priv_data: any private data needed + * @quirks: specified quirks + * @base_addr: base address of entries (if specified) + */ +struct intel_vsec_device { + struct auxiliary_device auxdev; + struct pci_dev *pcidev; + struct resource *resource; + struct ida *ida; + int num_resources; + int id; /* xa */ + void *priv_data; + size_t priv_data_size; + unsigned long quirks; + u64 base_addr; +}; + +int intel_vsec_add_aux(struct pci_dev *pdev, struct device *parent, + struct intel_vsec_device *intel_vsec_dev, + const char *name); + +static inline struct intel_vsec_device *dev_to_ivdev(struct device *dev) +{ + return container_of(dev, struct intel_vsec_device, auxdev.dev); +} + +static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device *auxdev) +{ + return container_of(auxdev, struct intel_vsec_device, auxdev); +} + +#if IS_ENABLED(CONFIG_INTEL_VSEC) +void intel_vsec_register(struct pci_dev *pdev, + struct intel_vsec_platform_info *info); +#else +static inline void intel_vsec_register(struct pci_dev *pdev, + struct intel_vsec_platform_info *info) +{ +} +#endif +#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 3f30c88e0b4c..8cd9327e4e78 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -276,7 +276,7 @@ struct irq_affinity_notify { #define IRQ_AFFINITY_MAX_SETS 4 /** - * struct irq_affinity - Description for automatic irq affinity assignements + * struct irq_affinity - Description for automatic irq affinity assignments * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of * the MSI(-X) vector space * @post_vectors: Don't apply affinity to @post_vectors at end of @@ -594,7 +594,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS]; struct softirq_action { - void (*action)(struct softirq_action *); + void (*action)(void); }; asmlinkage void do_softirq(void); @@ -609,13 +609,60 @@ static inline void do_softirq_post_smp_call_flush(unsigned int unused) } #endif -extern void open_softirq(int nr, void (*action)(struct softirq_action *)); +extern void open_softirq(int nr, void (*action)(void)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); +/* + * With forced-threaded interrupts enabled a raised softirq is deferred to + * ksoftirqd unless it can be handled within the threaded interrupt. This + * affects timer_list timers and hrtimers which are explicitly marked with + * HRTIMER_MODE_SOFT. + * With PREEMPT_RT enabled more hrtimers are moved to softirq for processing + * which includes all timers which are not explicitly marked HRTIMER_MODE_HARD. + * Userspace controlled timers (like the clock_nanosleep() interface) is divided + * into two categories: Tasks with elevated scheduling policy including + * SCHED_{FIFO|RR|DL} and the remaining scheduling policy. The tasks with the + * elevated scheduling policy are woken up directly from the HARDIRQ while all + * other wake ups are delayed to softirq and so to ksoftirqd. + * + * The ksoftirqd runs at SCHED_OTHER policy at which it should remain since it + * handles the softirq in an overloaded situation (not handled everything + * within its last run). + * If the timers are handled at SCHED_OTHER priority then they competes with all + * other SCHED_OTHER tasks for CPU resources are possibly delayed. + * Moving timers softirqs to a low priority SCHED_FIFO thread instead ensures + * that timer are performed before scheduling any SCHED_OTHER thread. + */ +DECLARE_PER_CPU(struct task_struct *, ktimerd); +DECLARE_PER_CPU(unsigned long, pending_timer_softirq); +void raise_ktimers_thread(unsigned int nr); + +static inline unsigned int local_timers_pending_force_th(void) +{ + return __this_cpu_read(pending_timer_softirq); +} + +static inline void raise_timer_softirq(unsigned int nr) +{ + lockdep_assert_in_irq(); + if (force_irqthreads()) + raise_ktimers_thread(nr); + else + __raise_softirq_irqoff(nr); +} + +static inline unsigned int local_timers_pending(void) +{ + if (force_irqthreads()) + return local_timers_pending_force_th(); + else + return local_softirq_pending(); +} + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index f9a81761bfce..ce86b09ae80f 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -87,6 +87,7 @@ struct io_pgtable_cfg { * attributes set in the TCR for a non-coherent page-table walker. * * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. + * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -95,6 +96,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) + #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; @@ -171,6 +173,10 @@ struct io_pgtable_cfg { u64 ttbr[4]; u32 n_ttbrs; } apple_dart_cfg; + + struct { + int nid; + } amd; }; }; diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 447fbfd32215..0d5448c0b86c 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -23,6 +23,15 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) return sqe->cmd; } +static inline void io_uring_cmd_private_sz_check(size_t cmd_sz) +{ + BUILD_BUG_ON(cmd_sz > sizeof_field(struct io_uring_cmd, pdu)); +} +#define io_uring_cmd_to_pdu(cmd, pdu_type) ( \ + io_uring_cmd_private_sz_check(sizeof(pdu_type)), \ + ((pdu_type *)&(cmd)->pdu) \ +) + #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); @@ -34,7 +43,7 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, * Note: the caller should never hard code @issue_flags and is only allowed * to pass the mask provided by the core io_uring code. */ -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, u64 res2, unsigned issue_flags); void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, @@ -48,6 +57,9 @@ void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags); +/* Execute the request from a blocking context */ +void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd); + #else static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) @@ -55,7 +67,7 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, return -EOPNOTSUPP; } static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2, unsigned issue_flags) + u64 ret2, unsigned issue_flags) { } static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, @@ -67,6 +79,9 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags) { } +static inline void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd) +{ +} #endif /* @@ -95,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd) { - return cmd_to_io_kiocb(cmd)->task; + return cmd_to_io_kiocb(cmd)->tctx->task; } #endif /* _LINUX_IO_URING_CMD_H */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3315005df117..011860ade268 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -37,6 +37,7 @@ enum io_uring_cmd_flags { /* set when uring wants to cancel a previously issued command */ IO_URING_F_CANCEL = (1 << 11), IO_URING_F_COMPAT = (1 << 12), + IO_URING_F_TASK_DEAD = (1 << 13), }; struct io_wq_work_node { @@ -55,19 +56,18 @@ struct io_wq_work { int cancel_seq; }; -struct io_fixed_file { - /* file * with additional FFS_* flags */ - unsigned long file_ptr; +struct io_rsrc_data { + unsigned int nr; + struct io_rsrc_node **nodes; }; struct io_file_table { - struct io_fixed_file *files; + struct io_rsrc_data data; unsigned long *bitmap; unsigned int alloc_hint; }; struct io_hash_bucket { - spinlock_t lock; struct hlist_head list; } ____cacheline_aligned_in_smp; @@ -76,6 +76,12 @@ struct io_hash_table { unsigned hash_bits; }; +struct io_mapped_region { + struct page **pages; + void *vmap_ptr; + size_t nr_pages; +}; + /* * Arbitrary limit, can be raised if need be */ @@ -85,6 +91,7 @@ struct io_uring_task { /* submission side */ int cached_refs; const struct io_ring_ctx *last; + struct task_struct *task; struct io_wq *io_wq; struct file *registered_rings[IO_RINGFD_REG_MAX]; @@ -239,6 +246,9 @@ struct io_ring_ctx { struct io_rings *rings; struct percpu_ref refs; + clockid_t clockid; + enum tk_offsets clock_offset; + enum task_work_notify_mode notify_method; unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; @@ -267,7 +277,6 @@ struct io_ring_ctx { * Fixed resources fast path, should be accessed only under * uring_lock, and updated through io_uring_register(2) */ - struct io_rsrc_node *rsrc_node; atomic_t cancel_seq; /* @@ -280,15 +289,13 @@ struct io_ring_ctx { struct io_wq_work_list iopoll_list; struct io_file_table file_table; - struct io_mapped_ubuf **user_bufs; - unsigned nr_user_files; - unsigned nr_user_bufs; + struct io_rsrc_data buf_table; struct io_submit_state submit_state; struct xarray io_bl_xa; - struct io_hash_table cancel_table_locked; + struct io_hash_table cancel_table; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; struct io_alloc_cache rw_cache; @@ -299,6 +306,11 @@ struct io_ring_ctx { * ->uring_cmd() by io_uring_cmd_insert_cancelable() */ struct hlist_head cancelable_uring_cmd; + /* + * For Hybrid IOPOLL, runtime in hybrid polling, without + * scheduling time + */ + u64 hybrid_poll_time; } ____cacheline_aligned_in_smp; struct { @@ -313,6 +325,9 @@ struct io_ring_ctx { unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; unsigned cq_extra; + + void *cq_wait_arg; + size_t cq_wait_size; } ____cacheline_aligned_in_smp; /* @@ -321,6 +336,7 @@ struct io_ring_ctx { */ struct { struct llist_head work_llist; + struct llist_head retry_llist; unsigned long check_cq; atomic_t cq_wait_nr; atomic_t cq_timeouts; @@ -339,7 +355,6 @@ struct io_ring_ctx { struct list_head io_buffers_comp; struct list_head cq_overflow_list; - struct io_hash_table cancel_table; struct hlist_head waitid_list; @@ -363,16 +378,6 @@ struct io_ring_ctx { struct wait_queue_head poll_wq; struct io_restriction restrictions; - /* slow path rsrc auxilary data, used by update/register */ - struct io_rsrc_data *file_data; - struct io_rsrc_data *buf_data; - - /* protected by ->uring_lock */ - struct list_head rsrc_ref_list; - struct io_alloc_cache rsrc_node_cache; - struct wait_queue_head rsrc_quiesce_wq; - unsigned rsrc_quiesce; - u32 pers_next; struct xarray personalities; @@ -406,7 +411,7 @@ struct io_ring_ctx { /* napi busy poll default timeout */ ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; - bool napi_enabled; + u8 napi_track_mode; DECLARE_HASHTABLE(napi_ht, 4); #endif @@ -415,6 +420,13 @@ struct io_ring_ctx { unsigned evfd_last_cq_tail; /* + * Protection for resize vs mmap races - both the mmap and resize + * side will need to grab this lock, to prevent either side from + * being run concurrently with the other. + */ + struct mutex resize_lock; + + /* * If IORING_SETUP_NO_MMAP is used, then the below holds * the gup'ed pages for the two rings, and the sqes. */ @@ -422,6 +434,9 @@ struct io_ring_ctx { unsigned short n_sqe_pages; struct page **ring_pages; struct page **sqe_pages; + + /* used for optimised request parameter and wait argument passing */ + struct io_mapped_region param_region; }; struct io_tw_state { @@ -444,6 +459,7 @@ enum { REQ_F_LINK_TIMEOUT_BIT, REQ_F_NEED_CLEANUP_BIT, REQ_F_POLLED_BIT, + REQ_F_HYBRID_IOPOLL_STATE_BIT, REQ_F_BUFFER_SELECTED_BIT, REQ_F_BUFFER_RING_BIT, REQ_F_REISSUE_BIT, @@ -456,7 +472,6 @@ enum { REQ_F_DOUBLE_POLL_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, - REQ_F_HASH_LOCKED_BIT, /* keep async read/write and isreg together and in order */ REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, @@ -465,6 +480,7 @@ enum { REQ_F_BL_EMPTY_BIT, REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, + REQ_F_BUF_NODE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -503,6 +519,8 @@ enum { REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT), + /* every req only blocks once in hybrid poll */ + REQ_F_IOPOLL_STATE = IO_REQ_FLAG(REQ_F_HYBRID_IOPOLL_STATE_BIT), /* buffer already selected */ REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT), /* buffer selected from ring, needs commit */ @@ -531,8 +549,6 @@ enum { REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), - /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ - REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), /* don't use lazy poll wake for this request */ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), /* file is pollable */ @@ -543,6 +559,8 @@ enum { REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), /* buffer ring head needs incrementing on put */ REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), + /* buf node is valid */ + REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); @@ -612,12 +630,9 @@ struct io_kiocb { struct io_cqe cqe; struct io_ring_ctx *ctx; - struct task_struct *task; + struct io_uring_task *tctx; union { - /* store used ubuf, so we can prevent reloading */ - struct io_mapped_ubuf *imu; - /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ struct io_buffer *kbuf; @@ -626,6 +641,8 @@ struct io_kiocb { * REQ_F_BUFFER_RING is set. */ struct io_buffer_list *buf_list; + + struct io_rsrc_node *buf_node; }; union { @@ -635,13 +652,20 @@ struct io_kiocb { __poll_t apoll_events; }; - struct io_rsrc_node *rsrc_node; + struct io_rsrc_node *file_node; atomic_t refs; bool cancel_seq_set; struct io_task_work io_task_work; - /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ - struct hlist_node hash_node; + union { + /* + * for polled requests, i.e. IORING_OP_POLL_ADD and async armed + * poll + */ + struct hlist_node hash_node; + /* For IOPOLL setup queues, with hybrid polling */ + u64 iopoll_start; + }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; /* opcode allocated if it needs to store data for async defer */ @@ -664,4 +688,9 @@ struct io_overflow_cqe { struct io_uring_cqe cqe; }; +static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx) +{ + return ctx->flags & IORING_SETUP_CQE32; +} + #endif diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6fc1c858013d..5675af6b740c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -53,6 +53,9 @@ struct vm_fault; * * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent * rather than a file data extent. + * + * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must + * never be merged with the mapping before it. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -64,6 +67,7 @@ struct vm_fault; #define IOMAP_F_BUFFER_HEAD 0 #endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) +#define IOMAP_F_BOUNDARY (1U << 6) /* * Flags set by the core iomap code during operations: @@ -178,6 +182,7 @@ struct iomap_folio_ops { #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ +#define IOMAP_ATOMIC (1 << 9) struct iomap_ops { /* @@ -256,12 +261,41 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) return &i->iomap; } -ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, - const struct iomap_ops *ops); -int iomap_file_buffered_write_punch_delalloc(struct inode *inode, - struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, - int (*punch)(struct inode *inode, loff_t pos, loff_t length)); +/* + * Return the file offset for the first unchanged block after a short write. + * + * If nothing was written, round @pos down to point at the first block in + * the range, else round up to include the partially written block. + */ +static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos, + ssize_t written) +{ + if (unlikely(!written)) + return round_down(pos, i_blocksize(inode)); + return round_up(pos + written, i_blocksize(inode)); +} + +/* + * Check if the range needs to be unshared for a FALLOC_FL_UNSHARE_RANGE + * operation. + * + * Don't bother with blocks that are not shared to start with; or mappings that + * cannot be shared, such as inline data, delalloc reservations, holes or + * unwritten extents. + * + * Note that we use srcmap directly instead of iomap_iter_srcmap as unsharing + * requires providing a separate source map, and the presence of one is a good + * indicator that unsharing is needed, unlike IOMAP_F_SHARED which can be set + * for any data that goes into the COW fork for XFS. + */ +static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) +{ + return (iter->iomap.flags & IOMAP_F_SHARED) && + iter->srcmap.type == IOMAP_MAPPED; +} +ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, + const struct iomap_ops *ops, void *private); int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); @@ -277,6 +311,13 @@ int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); + +typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, + struct iomap *iomap); +void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, + loff_t end_byte, unsigned flags, struct iomap *iomap, + iomap_punch_t punch); + int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, const struct iomap_ops *ops); loff_t iomap_seek_hole(struct inode *inode, loff_t offset, diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h new file mode 100644 index 000000000000..508beaa44c39 --- /dev/null +++ b/include/linux/iommu-dma.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved + * + * DMA operations that map physical memory through IOMMU. + */ +#ifndef _LINUX_IOMMU_DMA_H +#define _LINUX_IOMMU_DMA_H + +#include <linux/dma-direction.h> + +#ifdef CONFIG_IOMMU_DMA +static inline bool use_dma_iommu(struct device *dev) +{ + return dev->dma_iommu; +} +#else +static inline bool use_dma_iommu(struct device *dev) +{ + return false; +} +#endif /* CONFIG_IOMMU_DMA */ + +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs); +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +unsigned long iommu_dma_get_merge_boundary(struct device *dev); +size_t iommu_dma_opt_mapping_size(void); +size_t iommu_dma_max_mapping_size(struct device *dev); +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs); +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); +void *iommu_dma_vmap_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt); +#define iommu_dma_vunmap_noncontiguous(dev, vaddr) \ + vunmap(vaddr); +int iommu_dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, + size_t size, struct sg_table *sgt); +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); + +#endif /* _LINUX_IOMMU_DMA_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd722f473635..318d27841130 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -42,6 +42,8 @@ struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; struct iommu_fault_param; +struct iommufd_ctx; +struct iommufd_viommu; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -491,7 +493,9 @@ static inline int __iommu_copy_struct_from_user_array( * @index: Index to the location in the array to copy user data from * @min_last: The last member of the data structure @kdst points in the * initial version. - * Return 0 for success, otherwise -error. + * + * Copy a single entry from a user array. Return 0 for success, otherwise + * -error. */ #define iommu_copy_struct_from_user_array(kdst, user_array, data_type, index, \ min_last) \ @@ -500,6 +504,50 @@ static inline int __iommu_copy_struct_from_user_array( offsetofend(typeof(*(kdst)), min_last)) /** + * iommu_copy_struct_from_full_user_array - Copy iommu driver specific user + * space data from an iommu_user_data_array + * @kdst: Pointer to an iommu driver specific user data that is defined in + * include/uapi/linux/iommufd.h + * @kdst_entry_size: sizeof(*kdst) + * @user_array: Pointer to a struct iommu_user_data_array for a user space + * array + * @data_type: The data type of the @kdst. Must match with @user_array->type + * + * Copy the entire user array. kdst must have room for kdst_entry_size * + * user_array->entry_num bytes. Return 0 for success, otherwise -error. + */ +static inline int +iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, + struct iommu_user_data_array *user_array, + unsigned int data_type) +{ + unsigned int i; + int ret; + + if (user_array->type != data_type) + return -EINVAL; + if (!user_array->entry_num) + return -EINVAL; + if (likely(user_array->entry_len == kdst_entry_size)) { + if (copy_from_user(kdst, user_array->uptr, + user_array->entry_num * + user_array->entry_len)) + return -EFAULT; + } + + /* Copy item by item */ + for (i = 0; i != user_array->entry_num; i++) { + ret = copy_struct_from_user( + kdst + kdst_entry_size * i, kdst_entry_size, + user_array->uptr + user_array->entry_len * i, + user_array->entry_len); + if (ret) + return ret; + } + return 0; +} + +/** * struct iommu_ops - iommu ops and capabilities * @capable: check capability * @hw_info: report iommu hardware information. The data buffer returned by this @@ -509,19 +557,19 @@ static inline int __iommu_copy_struct_from_user_array( * @domain_alloc: allocate and return an iommu domain if success. Otherwise * NULL is returned. The domain is not fully initialized until * the caller iommu_domain_alloc() returns. - * @domain_alloc_user: Allocate an iommu domain corresponding to the input - * parameters as defined in include/uapi/linux/iommufd.h. - * Unlike @domain_alloc, it is called only by IOMMUFD and - * must fully initialize the new domain before return. - * Upon success, if the @user_data is valid and the @parent - * points to a kernel-managed domain, the new domain must be - * IOMMU_DOMAIN_NESTED type; otherwise, the @parent must be - * NULL while the @user_data can be optionally provided, the - * new domain must support __IOMMU_DOMAIN_PAGING. - * Upon failure, ERR_PTR must be returned. + * @domain_alloc_paging_flags: Allocate an iommu domain corresponding to the + * input parameters as defined in + * include/uapi/linux/iommufd.h. The @user_data can be + * optionally provided, the new domain must support + * __IOMMU_DOMAIN_PAGING. Upon failure, ERR_PTR must be + * returned. * @domain_alloc_paging: Allocate an iommu_domain that can be used for - * UNMANAGED, DMA, and DMA_FQ domain types. + * UNMANAGED, DMA, and DMA_FQ domain types. This is the + * same as invoking domain_alloc_paging_flags() with + * @flags=0, @user_data=NULL. A driver should implement + * only one of the two ops. * @domain_alloc_sva: Allocate an iommu_domain for Shared Virtual Addressing. + * @domain_alloc_nested: Allocate an iommu_domain for nested translation. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -542,6 +590,14 @@ static inline int __iommu_copy_struct_from_user_array( * @remove_dev_pasid: Remove any translation configurations of a specific * pasid, so that any DMA transactions with this pasid * will be blocked by the hardware. + * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind + * the @dev, as the set of virtualization resources shared/passed + * to user space IOMMU instance. And associate it with a nesting + * @parent_domain. The @viommu_type must be defined in the header + * include/uapi/linux/iommufd.h + * It is required to call iommufd_viommu_alloc() helper for + * a bundled allocation of the core and the driver structures, + * using the given @ictx pointer. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops * @identity_domain: An always available, always attachable identity @@ -562,12 +618,15 @@ struct iommu_ops { /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); - struct iommu_domain *(*domain_alloc_user)( - struct device *dev, u32 flags, struct iommu_domain *parent, + struct iommu_domain *(*domain_alloc_paging_flags)( + struct device *dev, u32 flags, const struct iommu_user_data *user_data); struct iommu_domain *(*domain_alloc_paging)(struct device *dev); struct iommu_domain *(*domain_alloc_sva)(struct device *dev, struct mm_struct *mm); + struct iommu_domain *(*domain_alloc_nested)( + struct device *dev, struct iommu_domain *parent, u32 flags, + const struct iommu_user_data *user_data); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); @@ -591,6 +650,10 @@ struct iommu_ops { void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, struct iommu_domain *domain); + struct iommufd_viommu *(*viommu_alloc)( + struct device *dev, struct iommu_domain *parent_domain, + struct iommufd_ctx *ictx, unsigned int viommu_type); + const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; @@ -616,7 +679,8 @@ struct iommu_ops { * * EBUSY - device is attached to a domain and cannot be changed * * ENODEV - device specific errors, not able to be attached * * <others> - treated as ENODEV by the caller. Use is discouraged - * @set_dev_pasid: set an iommu domain to a pasid of device + * @set_dev_pasid: set or replace an iommu domain to a pasid of device. The pasid of + * the device should be left in the old config in error case. * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. * @unmap_pages: unmap a number of pages of the same size from an iommu domain @@ -635,14 +699,13 @@ struct iommu_ops { * @enforce_cache_coherency: Prevent any kind of DMA from bypassing IOMMU_CACHE, * including no-snoop TLPs on PCIe or other platform * specific mechanisms. - * @enable_nesting: Enable nesting * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*) * @free: Release the domain after use. */ struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, - ioasid_t pasid); + ioasid_t pasid, struct iommu_domain *old); int (*map_pages)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, @@ -663,7 +726,6 @@ struct iommu_domain_ops { dma_addr_t iova); bool (*enforce_cache_coherency)(struct iommu_domain *domain); - int (*enable_nesting)(struct iommu_domain *domain); int (*set_pgtable_quirks)(struct iommu_domain *domain, unsigned long quirks); @@ -784,12 +846,13 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) }; } -extern int bus_iommu_probe(const struct bus_type *bus); -extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); -extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); -struct iommu_domain *iommu_paging_domain_alloc(struct device *dev); +struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, unsigned int flags); +static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + return iommu_paging_domain_alloc_flags(dev, 0); +} extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -844,7 +907,6 @@ extern void iommu_group_put(struct iommu_group *group); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); -int iommu_enable_nesting(struct iommu_domain *domain); int iommu_set_pgtable_quirks(struct iommu_domain *domain, unsigned long quirks); @@ -994,6 +1056,8 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* CANWBS is supported */ +#define IOMMU_FWSPEC_PCI_RC_CANWBS (1 << 1) /* * An iommu attach handle represents a relationship between an iommu domain @@ -1081,19 +1145,15 @@ struct iommu_iotlb_gather {}; struct iommu_dirty_bitmap {}; struct iommu_dirty_ops {}; -static inline bool iommu_present(const struct bus_type *bus) -{ - return false; -} - static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) { return false; } -static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) +static inline struct iommu_domain *iommu_paging_domain_alloc_flags(struct device *dev, + unsigned int flags) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index ffc3a949f837..11110c749200 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -6,17 +6,47 @@ #ifndef __LINUX_IOMMUFD_H #define __LINUX_IOMMUFD_H -#include <linux/types.h> -#include <linux/errno.h> #include <linux/err.h> +#include <linux/errno.h> +#include <linux/refcount.h> +#include <linux/types.h> +#include <linux/xarray.h> struct device; -struct iommufd_device; -struct page; -struct iommufd_ctx; -struct iommufd_access; struct file; struct iommu_group; +struct iommu_user_data; +struct iommu_user_data_array; +struct iommufd_access; +struct iommufd_ctx; +struct iommufd_device; +struct iommufd_viommu_ops; +struct page; + +enum iommufd_object_type { + IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_DEVICE, + IOMMUFD_OBJ_HWPT_PAGING, + IOMMUFD_OBJ_HWPT_NESTED, + IOMMUFD_OBJ_IOAS, + IOMMUFD_OBJ_ACCESS, + IOMMUFD_OBJ_FAULT, + IOMMUFD_OBJ_VIOMMU, + IOMMUFD_OBJ_VDEVICE, +#ifdef CONFIG_IOMMUFD_TEST + IOMMUFD_OBJ_SELFTEST, +#endif + IOMMUFD_OBJ_MAX, +}; + +/* Base struct for all objects with a userspace ID handle. */ +struct iommufd_object { + refcount_t shortterm_users; + refcount_t users; + enum iommufd_object_type type; + unsigned int id; +}; struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); @@ -54,6 +84,45 @@ void iommufd_access_detach(struct iommufd_access *access); void iommufd_ctx_get(struct iommufd_ctx *ictx); +struct iommufd_viommu { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommu_device *iommu_dev; + struct iommufd_hwpt_paging *hwpt; + + const struct iommufd_viommu_ops *ops; + + struct xarray vdevs; + + unsigned int type; +}; + +/** + * struct iommufd_viommu_ops - vIOMMU specific operations + * @destroy: Clean up all driver-specific parts of an iommufd_viommu. The memory + * of the vIOMMU will be free-ed by iommufd core after calling this op + * @alloc_domain_nested: Allocate a IOMMU_DOMAIN_NESTED on a vIOMMU that holds a + * nesting parent domain (IOMMU_DOMAIN_PAGING). @user_data + * must be defined in include/uapi/linux/iommufd.h. + * It must fully initialize the new iommu_domain before + * returning. Upon failure, ERR_PTR must be returned. + * @cache_invalidate: Flush hardware cache used by a vIOMMU. It can be used for + * any IOMMU hardware specific cache: TLB and device cache. + * The @array passes in the cache invalidation requests, in + * form of a driver data structure. A driver must update the + * array->entry_num to report the number of handled requests. + * The data structure of the array entry must be defined in + * include/uapi/linux/iommufd.h + */ +struct iommufd_viommu_ops { + void (*destroy)(struct iommufd_viommu *viommu); + struct iommu_domain *(*alloc_domain_nested)( + struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data); + int (*cache_invalidate)(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array); +}; + #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); struct iommufd_ctx *iommufd_ctx_from_fd(int fd); @@ -111,4 +180,43 @@ static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) return -EOPNOTSUPP; } #endif /* CONFIG_IOMMUFD */ + +#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); +struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, + unsigned long vdev_id); +#else /* !CONFIG_IOMMUFD_DRIVER_CORE */ +static inline struct iommufd_object * +_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, + enum iommufd_object_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline struct device * +iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) +{ + return NULL; +} +#endif /* CONFIG_IOMMUFD_DRIVER_CORE */ + +/* + * Helpers for IOMMU driver to allocate driver structures that will be freed by + * the iommufd core. The free op will be called prior to freeing the memory. + */ +#define iommufd_viommu_alloc(ictx, drv_struct, member, viommu_ops) \ + ({ \ + drv_struct *ret; \ + \ + static_assert(__same_type(struct iommufd_viommu, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member.obj) == 0); \ + ret = (drv_struct *)_iommufd_object_alloc( \ + ictx, sizeof(drv_struct), IOMMUFD_OBJ_VIOMMU); \ + if (!IS_ERR(ret)) \ + ret->member.ops = viommu_ops; \ + ret; \ + }) #endif diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index 19a7b00baff4..91324c331a4b 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -19,19 +19,19 @@ * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @sleep_before_read: if it is true, sleep @sleep_us before read. * @args: arguments for @op poll * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. */ #define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \ sleep_before_read, args...) \ @@ -64,22 +64,22 @@ * @op: accessor function (takes @args as its arguments) * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). Should - * be less than ~10us since udelay is used (see - * Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @delay_before_read: if it is true, delay @delay_us before read. * @args: arguments for @op poll * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @args is stored in @val. - * * This macro does not rely on timekeeping. Hence it is safe to call even when * timekeeping is suspended, at the expense of an underestimation of wall clock * time, which is rather minimal with a non-zero delay_us. * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @args is stored in @val. */ #define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \ delay_before_read, args...) \ @@ -119,17 +119,17 @@ * @addr: Address to poll * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. Must not - * be called from atomic context if sleep_us or timeout_us are used. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. */ #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr) @@ -140,16 +140,16 @@ * @addr: Address to poll * @val: Variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). Should - * be less than ~10us since udelay is used (see - * Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either - * case, the last read value at @addr is stored in @val. - * * When available, you'll probably want to use one of the specialized * macros defined below rather than this macro directly. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. */ #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, false, addr) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6e9fb667a1c5..5385349f0b8a 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -249,6 +249,38 @@ struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size); resource_size_t resource_alignment(struct resource *res); + +/** + * resource_set_size - Calculate resource end address from size and start + * @res: Resource descriptor + * @size: Size of the resource + * + * Calculate the end address for @res based on @size. + * + * Note: The start address of @res must be set when calling this function. + * Prefer resource_set_range() if setting both the start address and @size. + */ +static inline void resource_set_size(struct resource *res, resource_size_t size) +{ + res->end = res->start + size - 1; +} + +/** + * resource_set_range - Set resource start and end addresses + * @res: Resource descriptor + * @start: Start address for the resource + * @size: Size of the resource + * + * Set @res start address and calculate the end address based on @size. + */ +static inline void resource_set_range(struct resource *res, + resource_size_t start, + resource_size_t size) +{ + res->start = start; + resource_set_size(res, size); +} + static inline resource_size_t resource_size(const struct resource *res) { return res->end - res->start + 1; diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index db1249cd9692..b25377b6ea98 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task_is_realtime(task)) + else if (rt_or_dl_task_policy(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; diff --git a/include/linux/ioremap.h b/include/linux/ioremap.h index f0e99fc7dd8b..2bd1661fe9ad 100644 --- a/include/linux/ioremap.h +++ b/include/linux/ioremap.h @@ -4,6 +4,7 @@ #include <linux/kasan.h> #include <asm/pgtable.h> +#include <asm/vmalloc.h> #if defined(CONFIG_HAS_IOMEM) || defined(CONFIG_GENERIC_IOREMAP) /* diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index 270454a6703d..c4aa58032faf 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -10,6 +10,7 @@ #include <linux/uio.h> #include <linux/bvec.h> +#include <linux/folio_queue.h> typedef size_t (*iov_step_f)(void *iter_base, size_t progress, size_t len, void *priv, void *priv2); @@ -141,6 +142,60 @@ size_t iterate_bvec(struct iov_iter *iter, size_t len, void *priv, void *priv2, } /* + * Handle ITER_FOLIOQ. + */ +static __always_inline +size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_step_f step) +{ + const struct folio_queue *folioq = iter->folioq; + unsigned int slot = iter->folioq_slot; + size_t progress = 0, skip = iter->iov_offset; + + if (slot == folioq_nr_slots(folioq)) { + /* The iterator may have been extended. */ + folioq = folioq->next; + slot = 0; + } + + do { + struct folio *folio = folioq_folio(folioq, slot); + size_t part, remain, consumed; + size_t fsize; + void *base; + + if (!folio) + break; + + fsize = folioq_folio_size(folioq, slot); + base = kmap_local_folio(folio, skip); + part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + if (skip >= fsize) { + skip = 0; + slot++; + if (slot == folioq_nr_slots(folioq) && folioq->next) { + folioq = folioq->next; + slot = 0; + } + } + if (remain) + break; + } while (len); + + iter->folioq_slot = slot; + iter->folioq = folioq; + iter->iov_offset = skip; + iter->count -= progress; + return progress; +} + +/* * Handle ITER_XARRAY. */ static __always_inline @@ -249,6 +304,8 @@ size_t iterate_and_advance2(struct iov_iter *iter, size_t len, void *priv, return iterate_bvec(iter, len, priv, priv2, step); if (iov_iter_is_kvec(iter)) return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_folioq(iter)) + return iterate_folioq(iter, len, priv, priv2, step); if (iov_iter_is_xarray(iter)) return iterate_xarray(iter, len, priv, priv2, step); return iterate_discard(iter, len, priv, priv2, step); @@ -271,4 +328,51 @@ size_t iterate_and_advance(struct iov_iter *iter, size_t len, void *priv, return iterate_and_advance2(iter, len, priv, NULL, ustep, step); } +/** + * iterate_and_advance_kernel - Iterate over a kernel-internal iterator + * @iter: The iterator to iterate over. + * @len: The amount to iterate over. + * @priv: Data for the step functions. + * @priv2: More data for the step functions. + * @step: Function for other iterators; given kernel addresses. + * + * Iterate over the next part of an iterator, up to the specified length. The + * buffer is presented in segments, which for kernel iteration are broken up by + * physical pages and mapped, with the mapped address being presented. + * + * [!] Note This will only handle BVEC, KVEC, FOLIOQ, XARRAY and DISCARD-type + * iterators; it will not handle UBUF or IOVEC-type iterators. + * + * A step functions, @step, must be provided, one for handling mapped kernel + * addresses and the other is given user addresses which have the potential to + * fault since no pinning is performed. + * + * The step functions are passed the address and length of the segment, @priv, + * @priv2 and the amount of data so far iterated over (which can, for example, + * be added to @priv to point to the right part of a second buffer). The step + * functions should return the amount of the segment they didn't process (ie. 0 + * indicates complete processsing). + * + * This function returns the amount of data processed (ie. 0 means nothing was + * processed and the value of @len means processes to completion). + */ +static __always_inline +size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv, + void *priv2, iov_step_f step) +{ + if (unlikely(iter->count < len)) + len = iter->count; + if (unlikely(!len)) + return 0; + if (iov_iter_is_bvec(iter)) + return iterate_bvec(iter, len, priv, priv2, step); + if (iov_iter_is_kvec(iter)) + return iterate_kvec(iter, len, priv, priv2, step); + if (iov_iter_is_folioq(iter)) + return iterate_folioq(iter, len, priv, priv2, step); + if (iov_iter_is_xarray(iter)) + return iterate_xarray(iter, len, priv, priv2, step); + return iterate_discard(iter, len, priv, priv2, step); +} + #endif /* _LINUX_IOV_ITER_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 383a0ea2ab91..a6e2aadbb91b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -89,6 +89,7 @@ struct ipv6_devconf { __u8 ioam6_enabled; __u8 ndisc_evict_nocarrier; __u8 ra_honor_pio_life; + __u8 ra_honor_pio_pflag; struct ctl_table_header *sysctl_header; }; diff --git a/include/linux/irq.h b/include/linux/irq.h index 1f5dbf1f92c9..fa711f80957b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -991,7 +991,6 @@ void irq_init_desc(unsigned int irq); * @ack: Ack register offset to reg_base * @eoi: Eoi register offset to reg_base * @type: Type configuration register offset to reg_base - * @polarity: Polarity configuration register offset to reg_base */ struct irq_chip_regs { unsigned long enable; @@ -1000,7 +999,6 @@ struct irq_chip_regs { unsigned long ack; unsigned long eoi; unsigned long type; - unsigned long polarity; }; /** @@ -1040,8 +1038,6 @@ struct irq_chip_type { * @irq_base: Interrupt base nr for this chip * @irq_cnt: Number of interrupts handled by this chip * @mask_cache: Cached mask register shared between all chip types - * @type_cache: Cached type register - * @polarity_cache: Cached polarity register * @wake_enabled: Interrupt can wakeup from suspend * @wake_active: Interrupt is marked as an wakeup from suspend source * @num_ct: Number of available irq_chip_type instances (usually 1) @@ -1068,8 +1064,6 @@ struct irq_chip_generic { unsigned int irq_base; unsigned int irq_cnt; u32 mask_cache; - u32 type_cache; - u32 polarity_cache; u32 wake_enabled; u32 wake_active; unsigned int num_ct; diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index ecabed6d3307..7f1f11a5e4e4 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -66,10 +66,12 @@ struct its_vpe { bool enabled; bool group; } sgi_config[16]; - atomic_t vmapp_count; }; }; + /* Track the VPE being mapped */ + atomic_t vmapp_count; + /* * Ensures mutual exclusion between affinity setting of the * vPE and vLPI operations using vpe->col_idx. diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h index faf0b800b1b0..7494952c5518 100644 --- a/include/linux/irqchip/riscv-imsic.h +++ b/include/linux/irqchip/riscv-imsic.h @@ -8,6 +8,8 @@ #include <linux/types.h> #include <linux/bitops.h> +#include <linux/device.h> +#include <linux/fwnode.h> #include <asm/csr.h> #define IMSIC_MMIO_PAGE_SHIFT 12 @@ -84,4 +86,11 @@ static inline const struct imsic_global_config *imsic_get_global_config(void) #endif +#ifdef CONFIG_ACPI +int imsic_platform_acpi_probe(struct fwnode_handle *fwnode); +struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev); +#else +static inline struct fwnode_handle *imsic_acpi_get_fwnode(struct device *dev) { return NULL; } +#endif + #endif diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index de6105f68fec..e432b6a12a32 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -291,7 +291,12 @@ struct irq_domain_chip_generic_info; * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; * Use ~0 for no limit; 0 for no direct mapping + * @hwirq_base: The first hardware interrupt number (legacy domains only) + * @virq_base: The first Linux interrupt number for legacy domains to + * immediately associate the interrupts after domain creation * @bus_token: Domain bus token + * @name_suffix: Optional name suffix to avoid collisions when multiple + * domains are added using same fwnode * @ops: Domain operation callbacks * @host_data: Controller private data pointer * @dgc_info: Geneneric chip information structure pointer used to @@ -307,7 +312,10 @@ struct irq_domain_info { unsigned int size; irq_hw_number_t hwirq_max; int direct_max; + unsigned int hwirq_base; + unsigned int virq_base; enum irq_domain_bus_token bus_token; + const char *name_suffix; const struct irq_domain_ops *ops; void *host_data; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 3f003d5fde53..57b074e0cfbb 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -18,6 +18,8 @@ #include <asm/irqflags.h> #include <asm/percpu.h> +struct task_struct; + /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); @@ -25,12 +27,16 @@ extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); + extern void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } + static inline void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle) {} #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 3496baa0b07f..e97206c721a0 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -5,30 +5,36 @@ #include <uapi/linux/irqnr.h> -extern int nr_irqs; +unsigned int irq_get_nr_irqs(void) __pure; +unsigned int irq_set_nr_irqs(unsigned int nr); extern struct irq_desc *irq_to_desc(unsigned int irq); unsigned int irq_get_next_irq(unsigned int offset); -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ - irq++, desc = irq_to_desc(irq)) \ - if (!desc) \ - ; \ - else - +#define for_each_irq_desc(irq, desc) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = 0, desc = irq_to_desc(irq); irq < __nr_irqs__; \ + irq++, desc = irq_to_desc(irq)) \ + if (!desc) \ + ; \ + else # define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; \ - irq--, desc = irq_to_desc(irq)) \ + for (irq = irq_get_nr_irqs() - 1, desc = irq_to_desc(irq); \ + irq >= 0; irq--, desc = irq_to_desc(irq)) \ if (!desc) \ ; \ else -# define for_each_active_irq(irq) \ - for (irq = irq_get_next_irq(0); irq < nr_irqs; \ - irq = irq_get_next_irq(irq + 1)) +#define for_each_active_irq(irq) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = irq_get_next_irq(0); irq < __nr_irqs__; \ + irq = irq_get_next_irq(irq + 1)) -#define for_each_irq_nr(irq) \ - for (irq = 0; irq < nr_irqs; irq++) +#define for_each_irq_nr(irq) \ + for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \ + __nr_irqs__ = 0) \ + for (irq = 0; irq < __nr_irqs__; irq++) #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 5157d92b6f23..50f7ea8714bf 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1086,7 +1086,7 @@ struct journal_s int j_revoke_records_per_block; /** - * @j_transaction_overhead: + * @j_transaction_overhead_buffers: * * Number of blocks each transaction needs for its own bookkeeping */ @@ -1675,7 +1675,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); -int jbd2_fc_release_bufs(journal_t *journal); +void jbd2_fc_release_bufs(journal_t *journal); /* * is_journal_abort @@ -1796,22 +1796,21 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[JBD_MAX_CHECKSUM_SIZE]; - } desc; + DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx, + DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE, + sizeof(*((struct shash_desc *)0)->__ctx))); int err; BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > JBD_MAX_CHECKSUM_SIZE); - desc.shash.tfm = journal->j_chksum_driver; - *(u32 *)desc.ctx = crc; + desc->tfm = journal->j_chksum_driver; + *(u32 *)desc->__ctx = crc; - err = crypto_shash_update(&desc.shash, address, length); + err = crypto_shash_update(desc, address, length); BUG_ON(err); - return *(u32 *)desc.ctx; + return *(u32 *)desc->__ctx; } /* Return most recent uncommitted transaction */ diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index d9f1435a5a13..ed945f42e064 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -418,7 +418,7 @@ extern unsigned long preset_lpj; #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\ TICK_NSEC -1) / (u64)TICK_NSEC)) /* - * The maximum jiffie value is (MAX_INT >> 1). Here we translate that + * The maximum jiffy value is (MAX_INT >> 1). Here we translate that * into seconds. The 64-bit case will overflow if we are not careful, * so use the messy SH_DIV macro to do it. Still all constants. */ @@ -502,7 +502,7 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) * - all other values are converted to jiffies by either multiplying * the input value by a factor or dividing it with a factor and * handling any 32-bit overflows. - * for the details see __msecs_to_jiffies() + * for the details see _msecs_to_jiffies() * * msecs_to_jiffies() checks for the passed in value being a constant * via __builtin_constant_p() allowing gcc to eliminate most of the @@ -526,6 +526,19 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) } } +/** + * secs_to_jiffies: - convert seconds to jiffies + * @_secs: time in seconds + * + * Conversion is done by simple multiplication with HZ + * + * secs_to_jiffies() is defined as a macro rather than a static inline + * function so it can be used in static initializers. + * + * Return: jiffies value + */ +#define secs_to_jiffies(_secs) ((_secs) * HZ) + extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) static inline unsigned long _usecs_to_jiffies(const unsigned int u) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 70d6a8f6e25d..6bbfc8aa42e8 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -29,6 +29,9 @@ typedef unsigned int __bitwise kasan_vmalloc_flags_t; #define KASAN_VMALLOC_VM_ALLOC ((__force kasan_vmalloc_flags_t)0x02u) #define KASAN_VMALLOC_PROT_NORMAL ((__force kasan_vmalloc_flags_t)0x04u) +#define KASAN_VMALLOC_PAGE_RANGE 0x1 /* Apply exsiting page range */ +#define KASAN_VMALLOC_TLB_FLUSH 0x2 /* TLB flush */ + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #include <linux/pgtable.h> @@ -175,13 +178,59 @@ static __always_inline void * __must_check kasan_init_slab_obj( return (void *)object; } -bool __kasan_slab_free(struct kmem_cache *s, void *object, - unsigned long ip, bool init); +bool __kasan_slab_pre_free(struct kmem_cache *s, void *object, + unsigned long ip); +/** + * kasan_slab_pre_free - Check whether freeing a slab object is safe. + * @object: Object to be freed. + * + * This function checks whether freeing the given object is safe. It may + * check for double-free and invalid-free bugs and report them. + * + * This function is intended only for use by the slab allocator. + * + * @Return true if freeing the object is unsafe; false otherwise. + */ +static __always_inline bool kasan_slab_pre_free(struct kmem_cache *s, + void *object) +{ + if (kasan_enabled()) + return __kasan_slab_pre_free(s, object, _RET_IP_); + return false; +} + +bool __kasan_slab_free(struct kmem_cache *s, void *object, bool init, + bool still_accessible); +/** + * kasan_slab_free - Poison, initialize, and quarantine a slab object. + * @object: Object to be freed. + * @init: Whether to initialize the object. + * @still_accessible: Whether the object contents are still accessible. + * + * This function informs that a slab object has been freed and is not + * supposed to be accessed anymore, except when @still_accessible is set + * (indicating that the object is in a SLAB_TYPESAFE_BY_RCU cache and an RCU + * grace period might not have passed yet). + * + * For KASAN modes that have integrated memory initialization + * (kasan_has_integrated_init() == true), this function also initializes + * the object's memory. For other modes, the @init argument is ignored. + * + * This function might also take ownership of the object to quarantine it. + * When this happens, KASAN will defer freeing the object to a later + * stage and handle it internally until then. The return value indicates + * whether KASAN took ownership of the object. + * + * This function is intended only for use by the slab allocator. + * + * @Return true if KASAN took ownership of the object; false otherwise. + */ static __always_inline bool kasan_slab_free(struct kmem_cache *s, - void *object, bool init) + void *object, bool init, + bool still_accessible) { if (kasan_enabled()) - return __kasan_slab_free(s, object, _RET_IP_, init); + return __kasan_slab_free(s, object, init, still_accessible); return false; } @@ -371,7 +420,14 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache, { return (void *)object; } -static inline bool kasan_slab_free(struct kmem_cache *s, void *object, bool init) + +static inline bool kasan_slab_pre_free(struct kmem_cache *s, void *object) +{ + return false; +} + +static inline bool kasan_slab_free(struct kmem_cache *s, void *object, + bool init, bool still_accessible) { return false; } @@ -511,7 +567,8 @@ void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); int kasan_populate_vmalloc(unsigned long addr, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end); + unsigned long free_region_end, + unsigned long flags); #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ @@ -526,7 +583,8 @@ static inline int kasan_populate_vmalloc(unsigned long start, static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) { } + unsigned long free_region_end, + unsigned long flags) { } #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ @@ -561,7 +619,8 @@ static inline int kasan_populate_vmalloc(unsigned long start, static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) { } + unsigned long free_region_end, + unsigned long flags) { } static inline void *kasan_unpoison_vmalloc(const void *start, unsigned long size, diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h index 859f4b0c1b2b..196778a087c4 100644 --- a/include/linux/kernel-page-flags.h +++ b/include/linux/kernel-page-flags.h @@ -10,12 +10,11 @@ */ #define KPF_RESERVED 32 #define KPF_MLOCKED 33 -#define KPF_MAPPEDTODISK 34 +#define KPF_OWNER_2 34 #define KPF_PRIVATE 35 #define KPF_PRIVATE_2 36 #define KPF_OWNER_PRIVATE 37 #define KPF_ARCH 38 -#define KPF_UNCACHED 39 #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 #define KPF_ARCH_3 42 diff --git a/include/linux/key.h b/include/linux/key.h index 943a432da3ae..074dca3222b9 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -436,9 +436,6 @@ extern key_ref_t keyring_search(key_ref_t keyring, const char *description, bool recurse); -extern int keyring_add_key(struct key *keyring, - struct key *key); - extern int keyring_restrict(key_ref_t keyring, const char *type, const char *restriction); diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 88100cc9caba..0ad1ddbb8b99 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -124,7 +124,7 @@ static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp if (!static_branch_likely(&kfence_allocation_key)) return NULL; #endif - if (likely(atomic_read(&kfence_allocation_gate))) + if (likely(atomic_read(&kfence_allocation_gate) > 0)) return NULL; return __kfence_alloc(s, size, flags); } diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 564868bdce89..fd743d4c4b4b 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -37,7 +37,6 @@ */ #include <linux/array_size.h> -#include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/types.h> diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index f68865e19b0b..1f46046080f5 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -2,8 +2,7 @@ #ifndef _LINUX_KHUGEPAGED_H #define _LINUX_KHUGEPAGED_H -#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */ - +extern unsigned int khugepaged_max_ptes_none __read_mostly; #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern struct attribute_group khugepaged_attr_group; diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 6a3cd1bf4680..93a73c076d16 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -26,6 +26,7 @@ extern void kmemleak_free_part(const void *ptr, size_t size) __ref; extern void kmemleak_free_percpu(const void __percpu *ptr) __ref; extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; +extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; @@ -93,6 +94,9 @@ static inline void kmemleak_update_trace(const void *ptr) static inline void kmemleak_not_leak(const void *ptr) { } +static inline void kmemleak_transient_leak(const void *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 906521c2329c..6055fc969877 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -40,6 +40,17 @@ struct kmsg_dump_iter { }; /** + * struct kmsg_dump_detail - kernel crash detail + * @reason: reason for the crash, see kmsg_dump_reason. + * @description: optional short string, to provide additional information. + */ + +struct kmsg_dump_detail { + enum kmsg_dump_reason reason; + const char *description; +}; + +/** * struct kmsg_dumper - kernel crash message dumper structure * @list: Entry in the dumper list (private) * @dump: Call into dumping code which will retrieve the data with @@ -49,13 +60,13 @@ struct kmsg_dump_iter { */ struct kmsg_dumper { struct list_head list; - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); + void (*dump)(struct kmsg_dumper *dumper, struct kmsg_dump_detail *detail); enum kmsg_dump_reason max_reason; bool registered; }; #ifdef CONFIG_PRINTK -void kmsg_dump(enum kmsg_dump_reason reason); +void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc); bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); @@ -71,7 +82,7 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper); const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason); #else -static inline void kmsg_dump(enum kmsg_dump_reason reason) +static inline void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { } @@ -107,4 +118,9 @@ static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) } #endif +static inline void kmsg_dump(enum kmsg_dump_reason reason) +{ + kmsg_dump_desc(reason, NULL); +} + #endif /* _LINUX_KMSG_DUMP_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5fcbc254d186..8c4f3bb24429 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -269,15 +269,6 @@ extern unsigned long __stop_kprobe_blacklist[]; extern struct kretprobe_blackpoint kretprobe_blacklist[]; -#ifdef CONFIG_KPROBES_SANITY_TEST -extern int init_test_probes(void); -#else /* !CONFIG_KPROBES_SANITY_TEST */ -static inline int init_test_probes(void) -{ - return 0; -} -#endif /* CONFIG_KPROBES_SANITY_TEST */ - extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 11690dacd986..6a53ac4885bb 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -13,7 +13,6 @@ #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/sched.h> -#include <linux/sched/coredump.h> #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, @@ -54,12 +53,11 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm) return atomic_long_read(&mm->ksm_zero_pages); } -static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) +static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { + /* Adding mm to ksm is best effort on fork. */ if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) - return __ksm_enter(mm); - - return 0; + __ksm_enter(mm); } static inline int ksm_execve(struct mm_struct *mm) @@ -92,7 +90,7 @@ struct folio *ksm_might_need_to_copy(struct folio *folio, void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); -void collect_procs_ksm(struct folio *folio, struct page *page, +void collect_procs_ksm(const struct folio *folio, const struct page *page, struct list_head *to_kill, int force_early); long ksm_process_profit(struct mm_struct *); @@ -107,9 +105,8 @@ static inline int ksm_disable(struct mm_struct *mm) return 0; } -static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) +static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { - return 0; } static inline int ksm_execve(struct mm_struct *mm) @@ -125,8 +122,9 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) { } -static inline void collect_procs_ksm(struct folio *folio, struct page *page, - struct list_head *to_kill, int force_early) +static inline void collect_procs_ksm(const struct folio *folio, + const struct page *page, struct list_head *to_kill, + int force_early) { } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b23c6d48392f..401439bb21e3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -97,6 +97,7 @@ #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) #define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) +#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4) /* * error pfns indicate that the gfn is in slot but faild to @@ -153,13 +154,6 @@ static inline bool kvm_is_error_gpa(gpa_t gpa) return gpa == INVALID_GPA; } -#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) - -static inline bool is_error_page(struct page *page) -{ - return IS_ERR(page); -} - #define KVM_REQUEST_MASK GENMASK(7,0) #define KVM_REQUEST_NO_WAKEUP BIT(8) #define KVM_REQUEST_WAIT BIT(9) @@ -219,6 +213,7 @@ enum kvm_bus { KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, + KVM_IOCSR_BUS, KVM_NR_BUSES }; @@ -279,21 +274,19 @@ enum { READING_SHADOW_PAGE_TABLES, }; -#define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA) - struct kvm_host_map { /* * Only valid if the 'pfn' is managed by the host kernel (i.e. There is * a 'struct page' for it. When using mem= kernel parameter some memory * can be used as guest memory but they are not managed by host * kernel). - * If 'pfn' is not managed by the host kernel, this field is - * initialized to KVM_UNMAPPED_PAGE. */ + struct page *pinned_page; struct page *page; void *hva; kvm_pfn_t pfn; kvm_pfn_t gfn; + bool writable; }; /* @@ -342,7 +335,8 @@ struct kvm_vcpu { #ifndef __KVM_HAVE_ARCH_WQP struct rcuwait wait; #endif - struct pid __rcu *pid; + struct pid *pid; + rwlock_t pid_lock; int sigset_active; sigset_t sigset; unsigned int halt_poll_ns; @@ -485,7 +479,15 @@ static __always_inline void guest_state_enter_irqoff(void) */ static __always_inline void guest_context_exit_irqoff(void) { - context_tracking_guest_exit(); + /* + * Guest mode is treated as a quiescent state, see + * guest_context_enter_irqoff() for more details. + */ + if (!context_tracking_guest_exit()) { + instrumentation_begin(); + rcu_virt_note_context_switch(); + instrumentation_end(); + } } /* @@ -1168,6 +1170,10 @@ static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_ kvm_memslot_iter_is_valid(iter, end); \ kvm_memslot_iter_next(iter)) +struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); +struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); + /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot @@ -1206,33 +1212,70 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm); void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); -int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, - struct page **pages, int nr_pages); +int kvm_prefetch_pages(struct kvm_memory_slot *slot, gfn_t gfn, + struct page **pages, int nr_pages); + +struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn, bool write); +static inline struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +{ + return __gfn_to_page(kvm, gfn, true); +} -struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); + +static inline void kvm_release_page_unused(struct page *page) +{ + if (!page) + return; + + put_page(page); +} + void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, - bool *writable); -kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); -kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, - bool atomic, bool interruptible, bool *async, - bool write_fault, bool *writable, hva_t *hva); - -void kvm_release_pfn_clean(kvm_pfn_t pfn); -void kvm_release_pfn_dirty(kvm_pfn_t pfn); -void kvm_set_pfn_dirty(kvm_pfn_t pfn); -void kvm_set_pfn_accessed(kvm_pfn_t pfn); - -void kvm_release_pfn(kvm_pfn_t pfn, bool dirty); +static inline void kvm_release_faultin_page(struct kvm *kvm, struct page *page, + bool unused, bool dirty) +{ + lockdep_assert_once(lockdep_is_held(&kvm->mmu_lock) || unused); + + if (!page) + return; + + /* + * If the page that KVM got from the *primary MMU* is writable, and KVM + * installed or reused a SPTE, mark the page/folio dirty. Note, this + * may mark a folio dirty even if KVM created a read-only SPTE, e.g. if + * the GFN is write-protected. Folios can't be safely marked dirty + * outside of mmu_lock as doing so could race with writeback on the + * folio. As a result, KVM can't mark folios dirty in the fast page + * fault handler, and so KVM must (somewhat) speculatively mark the + * folio dirty if KVM could locklessly make the SPTE writable. + */ + if (unused) + kvm_release_page_unused(page); + else if (dirty) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); +} + +kvm_pfn_t __kvm_faultin_pfn(const struct kvm_memory_slot *slot, gfn_t gfn, + unsigned int foll, bool *writable, + struct page **refcounted_page); + +static inline kvm_pfn_t kvm_faultin_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, + bool write, bool *writable, + struct page **refcounted_page) +{ + return __kvm_faultin_pfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, + write ? FOLL_WRITE : 0, writable, refcounted_page); +} + int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); @@ -1296,19 +1339,28 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, }) int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); -struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); -struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); -struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); -kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); -kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); -int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); -void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); +int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, + bool writable); +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map); + +static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, + struct kvm_host_map *map) +{ + return __kvm_vcpu_map(vcpu, gpa, map, true); +} + +static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa, + struct kvm_host_map *map) +{ + return __kvm_vcpu_map(vcpu, gpa, map, false); +} + unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, @@ -1521,8 +1573,22 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING -int kvm_arch_hardware_enable(void); -void kvm_arch_hardware_disable(void); +/* + * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under + * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of + * kvm_usage_count, i.e. at the beginning of the generic hardware enabling + * sequence, and at the end of the generic hardware disabling sequence. + */ +void kvm_arch_enable_virtualization(void); +void kvm_arch_disable_virtualization(void); +/* + * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to + * do the actual twiddling of hardware bits. The hooks are called on all + * online CPUs when KVM enables/disabled virtualization, and on a single CPU + * when that CPU is onlined/offlined (including for Resume/Suspend). + */ +int kvm_arch_enable_virtualization_cpu(void); +void kvm_arch_disable_virtualization_cpu(void); #endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); @@ -1666,9 +1732,6 @@ void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); -struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn); -bool kvm_is_zone_device_page(struct page *page); - struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; @@ -2362,12 +2425,6 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ -typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); - -int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, - uintptr_t data, const char *name, - struct task_struct **thread_ptr); - #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { @@ -2441,11 +2498,13 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) #ifdef CONFIG_KVM_PRIVATE_MEM int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order); + gfn_t gfn, kvm_pfn_t *pfn, struct page **page, + int *max_order); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, - kvm_pfn_t *pfn, int *max_order) + kvm_pfn_t *pfn, struct page **page, + int *max_order) { KVM_BUG_ON(1, kvm); return -EIO; diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 68703a51dc53..c3ccdff4519a 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -12,7 +12,11 @@ #include <linux/device.h> #include <linux/mutex.h> #include <linux/notifier.h> -#include <linux/fb.h> + +#define LCD_POWER_ON (0) +#define LCD_POWER_REDUCED (1) // deprecated; don't use in new code +#define LCD_POWER_REDUCED_VSYNC_SUSPEND (2) // deprecated; don't use in new code +#define LCD_POWER_OFF (4) /* Notes on locking: * @@ -30,7 +34,6 @@ */ struct lcd_device; -struct fb_info; struct lcd_properties { /* The maximum value for contrast (read-only) */ @@ -47,11 +50,23 @@ struct lcd_ops { int (*get_contrast)(struct lcd_device *); /* Set LCD panel contrast */ int (*set_contrast)(struct lcd_device *, int contrast); - /* Set LCD panel mode (resolutions ...) */ - int (*set_mode)(struct lcd_device *, struct fb_videomode *); - /* Check if given framebuffer device is the one LCD is bound to; - return 0 if not, !=0 if it is. If NULL, lcd always matches the fb. */ - int (*check_fb)(struct lcd_device *, struct fb_info *); + + /* + * Set LCD panel mode (resolutions ...) + */ + int (*set_mode)(struct lcd_device *lcd, u32 xres, u32 yres); + + /* + * Check if the LCD controls the given display device. This + * operation is optional and if not implemented it is assumed that + * the display is always the one controlled by the LCD. + * + * RETURNS: + * + * If display_dev is NULL or display_dev matches the device controlled by + * the LCD, return true. Otherwise return false. + */ + bool (*controls_device)(struct lcd_device *lcd, struct device *display_device); }; struct lcd_device { diff --git a/include/linux/leds.h b/include/linux/leds.h index 6885603f211b..98f9719c924c 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -171,6 +171,7 @@ struct led_classdev { int new_blink_brightness; void (*flash_resume)(struct led_classdev *led_cdev); + struct workqueue_struct *wq; /* LED workqueue */ struct work_struct set_brightness_work; int delayed_set_value; unsigned long delayed_delay_on; @@ -238,7 +239,7 @@ struct led_classdev { struct kernfs_node *brightness_hw_changed_kn; #endif - /* Ensures consistent access to the LED Flash Class device */ + /* Ensures consistent access to the LED class device */ struct mutex led_access; }; @@ -611,6 +612,8 @@ enum led_trigger_netdev_modes { TRIGGER_NETDEV_FULL_DUPLEX, TRIGGER_NETDEV_TX, TRIGGER_NETDEV_RX, + TRIGGER_NETDEV_TX_ERR, + TRIGGER_NETDEV_RX_ERR, /* Keep last */ __TRIGGER_NETDEV_MAX, diff --git a/include/linux/libata.h b/include/linux/libata.h index 17394098bee9..c1a85d46eba6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -55,6 +55,46 @@ /* defines only for the constants which don't work well as enums */ #define ATA_TAG_POISON 0xfafbfcfdU +/* + * Quirk flags bits. + * ata_device->quirks is an unsigned int, so __ATA_QUIRK_MAX must not exceed 32. + */ +enum ata_quirks { + __ATA_QUIRK_DIAGNOSTIC, /* Failed boot diag */ + __ATA_QUIRK_NODMA, /* DMA problems */ + __ATA_QUIRK_NONCQ, /* Don't use NCQ */ + __ATA_QUIRK_MAX_SEC_128, /* Limit max sects to 128 */ + __ATA_QUIRK_BROKEN_HPA, /* Broken HPA */ + __ATA_QUIRK_DISABLE, /* Disable it */ + __ATA_QUIRK_HPA_SIZE, /* Native size off by one */ + __ATA_QUIRK_IVB, /* cbl det validity bit bugs */ + __ATA_QUIRK_STUCK_ERR, /* Stuck ERR on next PACKET */ + __ATA_QUIRK_BRIDGE_OK, /* No bridge limits */ + __ATA_QUIRK_ATAPI_MOD16_DMA, /* Use ATAPI DMA for commands that */ + /* are not a multiple of 16 bytes */ + __ATA_QUIRK_FIRMWARE_WARN, /* Firmware update warning */ + __ATA_QUIRK_1_5_GBPS, /* Force 1.5 Gbps */ + __ATA_QUIRK_NOSETXFER, /* Skip SETXFER, SATA only */ + __ATA_QUIRK_BROKEN_FPDMA_AA, /* Skip AA */ + __ATA_QUIRK_DUMP_ID, /* Dump IDENTIFY data */ + __ATA_QUIRK_MAX_SEC_LBA48, /* Set max sects to 65535 */ + __ATA_QUIRK_ATAPI_DMADIR, /* Device requires dmadir */ + __ATA_QUIRK_NO_NCQ_TRIM, /* Do not use queued TRIM */ + __ATA_QUIRK_NOLPM, /* Do not use LPM */ + __ATA_QUIRK_WD_BROKEN_LPM, /* Some WDs have broken LPM */ + __ATA_QUIRK_ZERO_AFTER_TRIM, /* Guarantees zero after trim */ + __ATA_QUIRK_NO_DMA_LOG, /* Do not use DMA for log read */ + __ATA_QUIRK_NOTRIM, /* Do not use TRIM */ + __ATA_QUIRK_MAX_SEC_1024, /* Limit max sects to 1024 */ + __ATA_QUIRK_MAX_TRIM_128M, /* Limit max trim size to 128M */ + __ATA_QUIRK_NO_NCQ_ON_ATI, /* Disable NCQ on ATI chipset */ + __ATA_QUIRK_NO_ID_DEV_LOG, /* Identify device log missing */ + __ATA_QUIRK_NO_LOG_DIR, /* Do not read log directory */ + __ATA_QUIRK_NO_FUA, /* Do not use FUA */ + + __ATA_QUIRK_MAX, +}; + enum { /* various global constants */ LIBATA_MAX_PRD = ATA_MAX_PRD / 2, @@ -230,9 +270,7 @@ enum { /* bits 24:31 of host->flags are reserved for LLD specific flags */ - /* various lengths of time */ - ATA_TMOUT_BOOT = 30000, /* heuristic */ - ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ + /* Various lengths of time */ ATA_TMOUT_INTERNAL_QUICK = 5000, ATA_TMOUT_MAX_PARK = 30000, @@ -338,6 +376,7 @@ enum { ATA_EHI_PRINTINFO = (1 << 18), /* print configuration info */ ATA_EHI_SETMODE = (1 << 19), /* configure transfer mode */ ATA_EHI_POST_SETMODE = (1 << 20), /* revalidating after setmode */ + ATA_EHI_DID_PRINT_QUIRKS = (1 << 21), /* already printed quirks info */ ATA_EHI_DID_RESET = ATA_EHI_DID_SOFTRESET | ATA_EHI_DID_HARDRESET, @@ -362,43 +401,42 @@ enum { */ ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 8, - /* Horkage types. May be set by libata or controller on drives - (some horkage may be drive/controller pair dependent */ - - ATA_HORKAGE_DIAGNOSTIC = (1 << 0), /* Failed boot diag */ - ATA_HORKAGE_NODMA = (1 << 1), /* DMA problems */ - ATA_HORKAGE_NONCQ = (1 << 2), /* Don't use NCQ */ - ATA_HORKAGE_MAX_SEC_128 = (1 << 3), /* Limit max sects to 128 */ - ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ - ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ - ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ - ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ - ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ - ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ - ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands - not multiple of 16 bytes */ - ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firmware update warning */ - ATA_HORKAGE_1_5_GBPS = (1 << 13), /* force 1.5 Gbps */ - ATA_HORKAGE_NOSETXFER = (1 << 14), /* skip SETXFER, SATA only */ - ATA_HORKAGE_BROKEN_FPDMA_AA = (1 << 15), /* skip AA */ - ATA_HORKAGE_DUMP_ID = (1 << 16), /* dump IDENTIFY data */ - ATA_HORKAGE_MAX_SEC_LBA48 = (1 << 17), /* Set max sects to 65535 */ - ATA_HORKAGE_ATAPI_DMADIR = (1 << 18), /* device requires dmadir */ - ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ - ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ - ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ - ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ - ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ - ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ - ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ - ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ - ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ - ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ - ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ - ATA_HORKAGE_NO_FUA = (1 << 30), /* Do not use FUA */ - - /* DMA mask for user DMA control: User visible values; DO NOT - renumber */ + /* + * Quirk flags: may be set by libata or controller drivers on drives. + * Some quirks may be drive/controller pair dependent. + */ + ATA_QUIRK_DIAGNOSTIC = (1U << __ATA_QUIRK_DIAGNOSTIC), + ATA_QUIRK_NODMA = (1U << __ATA_QUIRK_NODMA), + ATA_QUIRK_NONCQ = (1U << __ATA_QUIRK_NONCQ), + ATA_QUIRK_MAX_SEC_128 = (1U << __ATA_QUIRK_MAX_SEC_128), + ATA_QUIRK_BROKEN_HPA = (1U << __ATA_QUIRK_BROKEN_HPA), + ATA_QUIRK_DISABLE = (1U << __ATA_QUIRK_DISABLE), + ATA_QUIRK_HPA_SIZE = (1U << __ATA_QUIRK_HPA_SIZE), + ATA_QUIRK_IVB = (1U << __ATA_QUIRK_IVB), + ATA_QUIRK_STUCK_ERR = (1U << __ATA_QUIRK_STUCK_ERR), + ATA_QUIRK_BRIDGE_OK = (1U << __ATA_QUIRK_BRIDGE_OK), + ATA_QUIRK_ATAPI_MOD16_DMA = (1U << __ATA_QUIRK_ATAPI_MOD16_DMA), + ATA_QUIRK_FIRMWARE_WARN = (1U << __ATA_QUIRK_FIRMWARE_WARN), + ATA_QUIRK_1_5_GBPS = (1U << __ATA_QUIRK_1_5_GBPS), + ATA_QUIRK_NOSETXFER = (1U << __ATA_QUIRK_NOSETXFER), + ATA_QUIRK_BROKEN_FPDMA_AA = (1U << __ATA_QUIRK_BROKEN_FPDMA_AA), + ATA_QUIRK_DUMP_ID = (1U << __ATA_QUIRK_DUMP_ID), + ATA_QUIRK_MAX_SEC_LBA48 = (1U << __ATA_QUIRK_MAX_SEC_LBA48), + ATA_QUIRK_ATAPI_DMADIR = (1U << __ATA_QUIRK_ATAPI_DMADIR), + ATA_QUIRK_NO_NCQ_TRIM = (1U << __ATA_QUIRK_NO_NCQ_TRIM), + ATA_QUIRK_NOLPM = (1U << __ATA_QUIRK_NOLPM), + ATA_QUIRK_WD_BROKEN_LPM = (1U << __ATA_QUIRK_WD_BROKEN_LPM), + ATA_QUIRK_ZERO_AFTER_TRIM = (1U << __ATA_QUIRK_ZERO_AFTER_TRIM), + ATA_QUIRK_NO_DMA_LOG = (1U << __ATA_QUIRK_NO_DMA_LOG), + ATA_QUIRK_NOTRIM = (1U << __ATA_QUIRK_NOTRIM), + ATA_QUIRK_MAX_SEC_1024 = (1U << __ATA_QUIRK_MAX_SEC_1024), + ATA_QUIRK_MAX_TRIM_128M = (1U << __ATA_QUIRK_MAX_TRIM_128M), + ATA_QUIRK_NO_NCQ_ON_ATI = (1U << __ATA_QUIRK_NO_NCQ_ON_ATI), + ATA_QUIRK_NO_ID_DEV_LOG = (1U << __ATA_QUIRK_NO_ID_DEV_LOG), + ATA_QUIRK_NO_LOG_DIR = (1U << __ATA_QUIRK_NO_LOG_DIR), + ATA_QUIRK_NO_FUA = (1U << __ATA_QUIRK_NO_FUA), + + /* User visible DMA mask for DMA control. DO NOT renumber. */ ATA_DMA_MASK_ATA = (1 << 0), /* DMA on ATA Disk */ ATA_DMA_MASK_ATAPI = (1 << 1), /* DMA on ATAPI */ ATA_DMA_MASK_CFA = (1 << 2), /* DMA on CF Card */ @@ -660,10 +698,25 @@ struct ata_cpr_log { struct ata_cpr cpr[] __counted_by(nr_cpr); }; +struct ata_cdl { + /* + * Buffer to cache the CDL log page 18h (command duration descriptors) + * for SCSI-ATA translation. + */ + u8 desc_log_buf[ATA_LOG_CDL_SIZE]; + + /* + * Buffer to handle reading the sense data for successful NCQ Commands + * log page for commands using a CDL with one of the limits policy set + * to 0xD (successful completion with sense data available bit set). + */ + u8 ncq_sense_log_buf[ATA_LOG_SENSE_NCQ_SIZE]; +}; + struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ - unsigned int horkage; /* List of broken features */ + unsigned int quirks; /* List of broken features */ unsigned long flags; /* ATA_DFLAG_xxx */ struct scsi_device *sdev; /* attached SCSI device */ void *private_data; @@ -722,13 +775,16 @@ struct ata_device { /* Concurrent positioning ranges */ struct ata_cpr_log *cpr_log; - /* Command Duration Limits log support */ - u8 cdl[ATA_LOG_CDL_SIZE]; + /* Command Duration Limits support */ + struct ata_cdl *cdl; /* error history */ int spdn_cnt; /* ering is CLEAR_END, read comment above CLEAR_END */ struct ata_ering ering; + + /* For EH */ + u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; /* Fields between ATA_DEVICE_CLEAR_BEGIN and ATA_DEVICE_CLEAR_END are @@ -874,9 +930,6 @@ struct ata_port { #ifdef CONFIG_ATA_ACPI struct ata_acpi_gtm __acpi_init_gtm; /* use ata_acpi_init_gtm() */ #endif - /* owned by EH */ - u8 *ncq_sense_buf; - u8 sector_buf[ATA_SECT_SIZE] ____cacheline_aligned; }; /* The following initializer overrides a method to NULL whether one of @@ -1064,8 +1117,6 @@ static inline bool ata_port_is_frozen(const struct ata_port *ap) extern int ata_std_prereset(struct ata_link *link, unsigned long deadline); extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)); -extern int sata_std_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline); extern void ata_std_postreset(struct ata_link *link, unsigned int *classes); extern struct ata_host *ata_host_alloc(struct device *dev, int n_ports); @@ -1129,7 +1180,6 @@ extern int ata_xfer_mode2shift(u8 xfer_mode); extern const char *ata_mode_string(unsigned int xfer_mask); extern unsigned int ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); -extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc); extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg, unsigned int n_elem); extern unsigned int ata_dev_classify(const struct ata_taskfile *tf); @@ -1190,12 +1240,13 @@ extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); extern int sata_set_spd(struct ata_link *link); +int sata_std_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); extern int sata_link_hardreset(struct ata_link *link, const unsigned int *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); extern int sata_link_resume(struct ata_link *link, const unsigned int *params, unsigned long deadline); -extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link); extern void ata_eh_analyze_ncq_error(struct ata_link *link); #else static inline const unsigned int * @@ -1217,6 +1268,11 @@ static inline int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) return -EOPNOTSUPP; } static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; } +static inline int sata_std_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + return -EOPNOTSUPP; +} static inline int sata_link_hardreset(struct ata_link *link, const unsigned int *timing, unsigned long deadline, @@ -1233,10 +1289,6 @@ static inline int sata_link_resume(struct ata_link *link, { return -EOPNOTSUPP; } -static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link) -{ - return -EOPNOTSUPP; -} static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { } #endif extern int sata_link_debounce(struct ata_link *link, @@ -1967,7 +2019,6 @@ extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc, extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf, unsigned int buflen, int rw); extern void ata_sff_irq_on(struct ata_port *ap); -extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); extern void ata_sff_queue_work(struct work_struct *work); diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index d94bfd9ac8cc..3b9de09871f6 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -37,8 +37,9 @@ static inline bool linkmode_empty(const unsigned long *src) return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2) +static inline bool linkmode_andnot(unsigned long *dst, + const unsigned long *src1, + const unsigned long *src2) { return bitmap_andnot(dst, src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } diff --git a/include/linux/list.h b/include/linux/list.h index 5f4b0a39cf46..29a375889fb8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -687,14 +687,6 @@ static inline void list_splice_tail_init(struct list_head *list, for (pos = (head)->next; !list_is_head(pos, (head)); pos = pos->next) /** - * list_for_each_reverse - iterate backwards over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_reverse(pos, head) \ - for (pos = (head)->prev; pos != (head); pos = pos->prev) - -/** * list_for_each_rcu - Iterate over a list in an RCU-safe fashion * @pos: the &struct list_head to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 5099a8ccd5f4..05c166811f6b 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -32,6 +32,8 @@ struct list_lru_one { struct list_head list; /* may become negative during memcg reparenting */ long nr_items; + /* protects all fields above */ + spinlock_t lock; }; struct list_lru_memcg { @@ -41,11 +43,9 @@ struct list_lru_memcg { }; struct list_lru_node { - /* protects all lists on the node, including per cgroup */ - spinlock_t lock; /* global list, used for the root cgroup in cgroup aware lrus */ struct list_lru_one lru; - long nr_items; + atomic_long_t nr_items; } ____cacheline_aligned_in_smp; struct list_lru { @@ -56,16 +56,28 @@ struct list_lru { bool memcg_aware; struct xarray xa; #endif +#ifdef CONFIG_LOCKDEP + struct lock_class_key *key; +#endif }; void list_lru_destroy(struct list_lru *lru); int __list_lru_init(struct list_lru *lru, bool memcg_aware, - struct lock_class_key *key, struct shrinker *shrinker); + struct shrinker *shrinker); #define list_lru_init(lru) \ - __list_lru_init((lru), false, NULL, NULL) + __list_lru_init((lru), false, NULL) #define list_lru_init_memcg(lru, shrinker) \ - __list_lru_init((lru), true, NULL, shrinker) + __list_lru_init((lru), true, shrinker) + +static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker *shrinker, + struct lock_class_key *key) +{ +#ifdef CONFIG_LOCKDEP + lru->key = key; +#endif + return list_lru_init_memcg(lru, shrinker); +} int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp); @@ -172,7 +184,7 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, struct list_head *head); typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, - struct list_lru_one *list, spinlock_t *lock, void *cb_arg); + struct list_lru_one *list, void *cb_arg); /** * list_lru_walk_one: walk a @lru, isolating and disposing freeable items. diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 1b95fe31051f..c8f0f9458f2c 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -200,7 +200,7 @@ extern const struct svc_procedure nlmsvc_procedures[24]; extern const struct svc_procedure nlmsvc_procedures4[24]; #endif extern int nlmsvc_grace_period; -extern unsigned long nlmsvc_timeout; +extern unsigned long nlm_timeout; extern bool nsm_use_hostnames; extern u32 nsm_local_state; @@ -278,9 +278,9 @@ __be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, int, struct nlm_cookie *, int); __be32 nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *); -__be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, - struct nlm_host *, struct nlm_lock *, - struct nlm_lock *, struct nlm_cookie *); +__be32 nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, + struct nlm_host *host, struct nlm_lock *lock, + struct nlm_lock *conflock); __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); void nlmsvc_retry_blocked(struct svc_rqst *rqstp); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 80cca9426761..17d53165d9f2 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -73,8 +73,6 @@ struct nlm_args { u32 fsm_mode; }; -typedef struct nlm_args nlm_args; - /* * Generic lockd result */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 217f7abf2cbf..67964dc4db95 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -173,7 +173,7 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.lock_type) #define lockdep_set_subclass(lock, sub) \ - lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + lockdep_init_map_type(&(lock)->dep_map, (lock)->dep_map.name, (lock)->dep_map.key, sub,\ (lock)->dep_map.wait_type_inner, \ (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h index babf4e3c28ba..8f1a9408302f 100644 --- a/include/linux/logic_pio.h +++ b/include/linux/logic_pio.h @@ -17,7 +17,7 @@ enum { struct logic_pio_hwaddr { struct list_head list; - struct fwnode_handle *fwnode; + const struct fwnode_handle *fwnode; resource_size_t hw_start; resource_size_t io_start; resource_size_t size; /* range size populated */ @@ -110,8 +110,8 @@ void logic_outsl(unsigned long addr, const void *buffer, unsigned int count); #endif /* CONFIG_INDIRECT_PIO */ #define MMIO_UPPER_LIMIT (IO_SPACE_LIMIT - PIO_INDIRECT_SIZE) -struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode); -unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, +struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode); +unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode, resource_size_t hw_addr, resource_size_t size); int logic_pio_register_range(struct logic_pio_hwaddr *newrange); void logic_pio_unregister_range(struct logic_pio_hwaddr *range); diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index c9afcdd9324c..ff82ef85a084 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -119,7 +119,7 @@ write intent log information, three of which are mentioned here. */ /* this defines an element in a tracked set - * .colision is for hash table lookup. + * .collision is for hash table lookup. * When we process a new IO request, we know its sector, thus can deduce the * region number (label) easily. To do the label -> object lookup without a * full list walk, we use a simple hash table. @@ -145,7 +145,7 @@ write intent log information, three of which are mentioned here. * But it avoids high order page allocations in kmalloc. */ struct lc_element { - struct hlist_node colision; + struct hlist_node collision; struct list_head list; /* LRU list or free list */ unsigned refcnt; /* back "pointer" into lc_cache->element[index], diff --git a/include/linux/lsm/apparmor.h b/include/linux/lsm/apparmor.h new file mode 100644 index 000000000000..612cbfacb072 --- /dev/null +++ b/include/linux/lsm/apparmor.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * AppArmor presents single pointer to an aa_label structure. + */ +#ifndef __LINUX_LSM_APPARMOR_H +#define __LINUX_LSM_APPARMOR_H + +struct aa_label; + +struct lsm_prop_apparmor { +#ifdef CONFIG_SECURITY_APPARMOR + struct aa_label *label; +#endif +}; + +#endif /* ! __LINUX_LSM_APPARMOR_H */ diff --git a/include/linux/lsm/bpf.h b/include/linux/lsm/bpf.h new file mode 100644 index 000000000000..8106e206fcef --- /dev/null +++ b/include/linux/lsm/bpf.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * BPF may present a single u32 value. + */ +#ifndef __LINUX_LSM_BPF_H +#define __LINUX_LSM_BPF_H +#include <linux/types.h> + +struct lsm_prop_bpf { +#ifdef CONFIG_BPF_LSM + u32 secid; +#endif +}; + +#endif /* ! __LINUX_LSM_BPF_H */ diff --git a/include/linux/lsm/selinux.h b/include/linux/lsm/selinux.h new file mode 100644 index 000000000000..9455a6b5b910 --- /dev/null +++ b/include/linux/lsm/selinux.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * SELinux presents a single u32 value which is known as a secid. + */ +#ifndef __LINUX_LSM_SELINUX_H +#define __LINUX_LSM_SELINUX_H +#include <linux/types.h> + +struct lsm_prop_selinux { +#ifdef CONFIG_SECURITY_SELINUX + u32 secid; +#endif +}; + +#endif /* ! __LINUX_LSM_SELINUX_H */ diff --git a/include/linux/lsm/smack.h b/include/linux/lsm/smack.h new file mode 100644 index 000000000000..ff730dd7a734 --- /dev/null +++ b/include/linux/lsm/smack.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module interface to other subsystems. + * Smack presents a pointer into the global Smack label list. + */ +#ifndef __LINUX_LSM_SMACK_H +#define __LINUX_LSM_SMACK_H + +struct smack_known; + +struct lsm_prop_smack { +#ifdef CONFIG_SECURITY_SMACK + struct smack_known *skp; +#endif +}; + +#endif /* ! __LINUX_LSM_SMACK_H */ diff --git a/include/linux/lsm_count.h b/include/linux/lsm_count.h new file mode 100644 index 000000000000..16eb49761b25 --- /dev/null +++ b/include/linux/lsm_count.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2023 Google LLC. + */ + +#ifndef __LINUX_LSM_COUNT_H +#define __LINUX_LSM_COUNT_H + +#include <linux/args.h> + +#ifdef CONFIG_SECURITY + +/* + * Macros to count the number of LSMs enabled in the kernel at compile time. + */ + +/* + * Capabilities is enabled when CONFIG_SECURITY is enabled. + */ +#if IS_ENABLED(CONFIG_SECURITY) +#define CAPABILITIES_ENABLED 1, +#else +#define CAPABILITIES_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_SELINUX) +#define SELINUX_ENABLED 1, +#else +#define SELINUX_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_SMACK) +#define SMACK_ENABLED 1, +#else +#define SMACK_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_APPARMOR) +#define APPARMOR_ENABLED 1, +#else +#define APPARMOR_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_TOMOYO) +#define TOMOYO_ENABLED 1, +#else +#define TOMOYO_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_YAMA) +#define YAMA_ENABLED 1, +#else +#define YAMA_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_LOADPIN) +#define LOADPIN_ENABLED 1, +#else +#define LOADPIN_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_LOCKDOWN_LSM) +#define LOCKDOWN_ENABLED 1, +#else +#define LOCKDOWN_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_SAFESETID) +#define SAFESETID_ENABLED 1, +#else +#define SAFESETID_ENABLED +#endif + +#if IS_ENABLED(CONFIG_BPF_LSM) +#define BPF_LSM_ENABLED 1, +#else +#define BPF_LSM_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_LANDLOCK) +#define LANDLOCK_ENABLED 1, +#else +#define LANDLOCK_ENABLED +#endif + +#if IS_ENABLED(CONFIG_IMA) +#define IMA_ENABLED 1, +#else +#define IMA_ENABLED +#endif + +#if IS_ENABLED(CONFIG_EVM) +#define EVM_ENABLED 1, +#else +#define EVM_ENABLED +#endif + +#if IS_ENABLED(CONFIG_SECURITY_IPE) +#define IPE_ENABLED 1, +#else +#define IPE_ENABLED +#endif + +/* + * There is a trailing comma that we need to be accounted for. This is done by + * using a skipped argument in __COUNT_LSMS + */ +#define __COUNT_LSMS(skipped_arg, args...) COUNT_ARGS(args...) +#define COUNT_LSMS(args...) __COUNT_LSMS(args) + +#define MAX_LSM_COUNT \ + COUNT_LSMS( \ + CAPABILITIES_ENABLED \ + SELINUX_ENABLED \ + SMACK_ENABLED \ + APPARMOR_ENABLED \ + TOMOYO_ENABLED \ + YAMA_ENABLED \ + LOADPIN_ENABLED \ + LOCKDOWN_ENABLED \ + SAFESETID_ENABLED \ + BPF_LSM_ENABLED \ + LANDLOCK_ENABLED \ + IMA_ENABLED \ + EVM_ENABLED \ + IPE_ENABLED) + +#else + +#define MAX_LSM_COUNT 0 + +#endif /* CONFIG_SECURITY */ + +#endif /* __LINUX_LSM_COUNT_H */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 855db460e08b..eb2937599cb0 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -48,7 +48,7 @@ LSM_HOOK(int, 0, quota_on, struct dentry *dentry) LSM_HOOK(int, 0, syslog, int type) LSM_HOOK(int, 0, settime, const struct timespec64 *ts, const struct timezone *tz) -LSM_HOOK(int, 1, vm_enough_memory, struct mm_struct *mm, long pages) +LSM_HOOK(int, 0, vm_enough_memory, struct mm_struct *mm, long pages) LSM_HOOK(int, 0, bprm_creds_for_exec, struct linux_binprm *bprm) LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, const struct file *file) LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm) @@ -114,6 +114,7 @@ LSM_HOOK(int, 0, path_notify, const struct path *path, u64 mask, unsigned int obj_type) LSM_HOOK(int, 0, inode_alloc_security, struct inode *inode) LSM_HOOK(void, LSM_RET_VOID, inode_free_security, struct inode *inode) +LSM_HOOK(void, LSM_RET_VOID, inode_free_security_rcu, void *inode_security) LSM_HOOK(int, -EOPNOTSUPP, inode_init_security, struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count) @@ -175,10 +176,13 @@ LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode, const char *name, const void *value, size_t size, int flags) LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) -LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode, + struct lsm_prop *prop) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src, const char *name) +LSM_HOOK(int, 0, inode_setintegrity, const struct inode *inode, + enum lsm_integrity_type type, const void *value, size_t size) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) @@ -214,6 +218,8 @@ LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old, LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new, const struct cred *old) LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, cred_getlsmprop, const struct cred *c, + struct lsm_prop *prop) LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid) LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode) LSM_HOOK(int, 0, kernel_module_request, char *kmod_name) @@ -232,9 +238,9 @@ LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old) LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid) LSM_HOOK(int, 0, task_getpgid, struct task_struct *p) LSM_HOOK(int, 0, task_getsid, struct task_struct *p) -LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid) -LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj, - struct task_struct *p, u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, current_getlsmprop_subj, struct lsm_prop *prop) +LSM_HOOK(void, LSM_RET_VOID, task_getlsmprop_obj, + struct task_struct *p, struct lsm_prop *prop) LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice) LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio) LSM_HOOK(int, 0, task_getioprio, struct task_struct *p) @@ -253,8 +259,8 @@ LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p, struct inode *inode) LSM_HOOK(int, 0, userns_create, const struct cred *cred) LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag) -LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp, - u32 *secid) +LSM_HOOK(void, LSM_RET_VOID, ipc_getlsmprop, struct kern_ipc_perm *ipcp, + struct lsm_prop *prop) LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg) LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg) LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm) @@ -291,6 +297,8 @@ LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, u32 *seclen) +LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, + char **secdata, u32 *seclen) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) @@ -353,8 +361,7 @@ LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, struct flowi_common *flic) -LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) -LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) +LSM_HOOK(int, 0, tun_dev_alloc_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) LSM_HOOK(int, 0, tun_dev_attach_queue, void *security) LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security) @@ -374,8 +381,7 @@ LSM_HOOK(int, 0, mptcp_add_subflow, struct sock *sk, struct sock *ssk) LSM_HOOK(int, 0, ib_pkey_access, void *sec, u64 subnet_prefix, u16 pkey) LSM_HOOK(int, 0, ib_endport_manage_subnet, void *sec, const char *dev_name, u8 port_num) -LSM_HOOK(int, 0, ib_alloc_security, void **sec) -LSM_HOOK(void, LSM_RET_VOID, ib_free_security, void *sec) +LSM_HOOK(int, 0, ib_alloc_security, void *sec) #endif /* CONFIG_SECURITY_INFINIBAND */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -403,7 +409,6 @@ LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, #ifdef CONFIG_KEYS LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, unsigned long flags) -LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) @@ -416,7 +421,8 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) -LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) +LSM_HOOK(int, 0, audit_rule_match, struct lsm_prop *prop, u32 field, u32 op, + void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) #endif /* CONFIG_AUDIT */ @@ -431,7 +437,7 @@ LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, struct bpf_token *token) LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) @@ -442,7 +448,6 @@ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) #ifdef CONFIG_PERF_EVENTS LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) -LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) #endif /* CONFIG_PERF_EVENTS */ @@ -452,3 +457,10 @@ LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ + +LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void) + +LSM_HOOK(int, 0, bdev_alloc_security, struct block_device *bdev) +LSM_HOOK(void, LSM_RET_VOID, bdev_free_security, struct block_device *bdev) +LSM_HOOK(int, 0, bdev_setintegrity, struct block_device *bdev, + enum lsm_integrity_type type, const void *value, size_t size) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index a2ade0ffe9e7..090d1d3e19fe 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -30,19 +30,47 @@ #include <linux/init.h> #include <linux/rculist.h> #include <linux/xattr.h> +#include <linux/static_call.h> +#include <linux/unroll.h> +#include <linux/jump_label.h> +#include <linux/lsm_count.h> union security_list_options { #define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__); #include "lsm_hook_defs.h" #undef LSM_HOOK + void *lsm_func_addr; }; -struct security_hook_heads { - #define LSM_HOOK(RET, DEFAULT, NAME, ...) struct hlist_head NAME; - #include "lsm_hook_defs.h" - #undef LSM_HOOK +/* + * @key: static call key as defined by STATIC_CALL_KEY + * @trampoline: static call trampoline as defined by STATIC_CALL_TRAMP + * @hl: The security_hook_list as initialized by the owning LSM. + * @active: Enabled when the static call has an LSM hook associated. + */ +struct lsm_static_call { + struct static_call_key *key; + void *trampoline; + struct security_hook_list *hl; + /* this needs to be true or false based on what the key defaults to */ + struct static_key_false *active; } __randomize_layout; +/* + * Table of the static calls for each LSM hook. + * Once the LSMs are initialized, their callbacks will be copied to these + * tables such that the calls are filled backwards (from last to first). + * This way, we can jump directly to the first used static call, and execute + * all of them after. This essentially makes the entry point + * dynamic to adapt the number of static calls to the number of callbacks. + */ +struct lsm_static_calls_table { + #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ + struct lsm_static_call NAME[MAX_LSM_COUNT]; + #include <linux/lsm_hook_defs.h> + #undef LSM_HOOK +} __packed __randomize_layout; + /** * struct lsm_id - Identify a Linux Security Module. * @lsm: name of the LSM, must be approved by the LSM maintainers @@ -51,53 +79,45 @@ struct security_hook_heads { * Contains the information that identifies the LSM. */ struct lsm_id { - const char *name; - u64 id; + const char *name; + u64 id; }; /* * Security module hook list structure. * For use with generic list macros for common operations. + * + * struct security_hook_list - Contents of a cacheable, mappable object. + * @scalls: The beginning of the array of static calls assigned to this hook. + * @hook: The callback for the hook. + * @lsm: The name of the lsm that owns this hook. */ struct security_hook_list { - struct hlist_node list; - struct hlist_head *head; - union security_list_options hook; - const struct lsm_id *lsmid; + struct lsm_static_call *scalls; + union security_list_options hook; + const struct lsm_id *lsmid; } __randomize_layout; /* * Security blob size or offset data. */ struct lsm_blob_sizes { - int lbs_cred; - int lbs_file; - int lbs_inode; - int lbs_superblock; - int lbs_ipc; - int lbs_msg_msg; - int lbs_task; - int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ + int lbs_cred; + int lbs_file; + int lbs_ib; + int lbs_inode; + int lbs_sock; + int lbs_superblock; + int lbs_ipc; + int lbs_key; + int lbs_msg_msg; + int lbs_perf_event; + int lbs_task; + int lbs_xattr_count; /* number of xattr slots in new_xattrs array */ + int lbs_tun_dev; + int lbs_bdev; }; -/** - * lsm_get_xattr_slot - Return the next available slot and increment the index - * @xattrs: array storing LSM-provided xattrs - * @xattr_count: number of already stored xattrs (updated) - * - * Retrieve the first available slot in the @xattrs array to fill with an xattr, - * and increment @xattr_count. - * - * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise. - */ -static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, - int *xattr_count) -{ - if (unlikely(!xattrs)) - return NULL; - return &xattrs[(*xattr_count)++]; -} - /* * LSM_RET_VOID is used as the default value in LSM_HOOK definitions for void * LSM hooks (in include/linux/lsm_hook_defs.h). @@ -110,11 +130,11 @@ static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, * care of the common case and reduces the amount of * text involved. */ -#define LSM_HOOK_INIT(HEAD, HOOK) \ - { .head = &security_hook_heads.HEAD, .hook = { .HEAD = HOOK } } - -extern struct security_hook_heads security_hook_heads; -extern char *lsm_names; +#define LSM_HOOK_INIT(NAME, HOOK) \ + { \ + .scalls = static_calls_table.NAME, \ + .hook = { .NAME = HOOK } \ + } extern void security_add_hooks(struct security_hook_list *hooks, int count, const struct lsm_id *lsmid); @@ -137,9 +157,6 @@ struct lsm_info { struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */ }; -extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; -extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; - #define DEFINE_LSM(lsm) \ static struct lsm_info __lsm_##lsm \ __used __section(".lsm_info.init") \ @@ -150,6 +167,28 @@ extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; __used __section(".early_lsm_info.init") \ __aligned(sizeof(unsigned long)) -extern int lsm_inode_alloc(struct inode *inode); +/* DO NOT tamper with these variables outside of the LSM framework */ +extern char *lsm_names; +extern struct lsm_static_calls_table static_calls_table __ro_after_init; +extern struct lsm_info __start_lsm_info[], __end_lsm_info[]; +extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[]; + +/** + * lsm_get_xattr_slot - Return the next available slot and increment the index + * @xattrs: array storing LSM-provided xattrs + * @xattr_count: number of already stored xattrs (updated) + * + * Retrieve the first available slot in the @xattrs array to fill with an xattr, + * and increment @xattr_count. + * + * Return: The slot to fill in @xattrs if non-NULL, NULL otherwise. + */ +static inline struct xattr *lsm_get_xattr_slot(struct xattr *xattrs, + int *xattr_count) +{ + if (unlikely(!xattrs)) + return NULL; + return &xattrs[(*xattr_count)++]; +} #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index a53ad4dabd7e..cbbcd18d4186 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -52,9 +52,9 @@ * bit in the node type. This is possible by using bit 1 to indicate if bit 2 * is part of the type or the slot. * - * Once the type is decided, the decision of an allocation range type or a range - * type is done by examining the immutable tree flag for the MAPLE_ALLOC_RANGE - * flag. + * Once the type is decided, the decision of an allocation range type or a + * range type is done by examining the immutable tree flag for the + * MT_FLAGS_ALLOC_RANGE flag. * * Node types: * 0x??1 = Root @@ -148,6 +148,18 @@ enum maple_type { maple_arange_64, }; +enum store_type { + wr_invalid, + wr_new_root, + wr_store_root, + wr_exact_fit, + wr_spanning_store, + wr_split_store, + wr_rebalance, + wr_append, + wr_node_store, + wr_slot_store, +}; /** * DOC: Maple tree flags @@ -212,7 +224,7 @@ typedef struct { /* nothing */ } lockdep_map_p; * (set at tree creation time) and dynamic information set under the spinlock. * * Another use of flags are to indicate global states of the tree. This is the - * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in + * case with the MT_FLAGS_USE_RCU flag, which indicates the tree is currently in * RCU mode. This mode was added to allow the tree to reuse nodes instead of * re-allocating and RCU freeing nodes when there is a single user. */ @@ -436,6 +448,7 @@ struct ma_state { unsigned char offset; unsigned char mas_flags; unsigned char end; /* The end of the node */ + enum store_type store_type; /* The type of store needed for this operation */ }; struct ma_wr_state { @@ -477,6 +490,7 @@ struct ma_wr_state { .max = ULONG_MAX, \ .alloc = NULL, \ .mas_flags = 0, \ + .store_type = wr_invalid, \ } #define MA_WR_STATE(name, ma_state, wr_entry) \ @@ -578,6 +592,20 @@ static __always_inline void mas_reset(struct ma_state *mas) #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) +/** + * mas_for_each_rev() - Iterate over a range of the maple tree in reverse order. + * @__mas: Maple Tree operation state (maple_state) + * @__entry: Entry retrieved from the tree + * @__min: minimum index to retrieve from the tree + * + * When returned, mas->index and mas->last will hold the entire range for the + * entry. + * + * Note: may return the zero entry. + */ +#define mas_for_each_rev(__mas, __entry, __min) \ + while (((__entry) = mas_find_rev((__mas), (__min))) != NULL) + #ifdef CONFIG_DEBUG_MAPLE_TREE enum mt_dump_format { mt_dump_dec, diff --git a/include/linux/mdio.h b/include/linux/mdio.h index efeca5bd7600..3c3deac57894 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -165,31 +165,12 @@ extern int mdio_set_flag(const struct mdio_if_info *mdio, bool sense); extern int mdio45_links_ok(const struct mdio_if_info *mdio, u32 mmds); extern int mdio45_nway_restart(const struct mdio_if_info *mdio); -extern void mdio45_ethtool_gset_npage(const struct mdio_if_info *mdio, - struct ethtool_cmd *ecmd, - u32 npage_adv, u32 npage_lpa); extern void mdio45_ethtool_ksettings_get_npage(const struct mdio_if_info *mdio, struct ethtool_link_ksettings *cmd, u32 npage_adv, u32 npage_lpa); /** - * mdio45_ethtool_gset - get settings for ETHTOOL_GSET - * @mdio: MDIO interface - * @ecmd: Ethtool request structure - * - * Since the CSRs for auto-negotiation using next pages are not fully - * standardised, this function does not attempt to decode them. Use - * mdio45_ethtool_gset_npage() to specify advertisement bits from next - * pages. - */ -static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio, - struct ethtool_cmd *ecmd) -{ - mdio45_ethtool_gset_npage(mdio, ecmd, 0, 0); -} - -/** * mdio45_ethtool_ksettings_get - get settings for ETHTOOL_GLINKSETTINGS * @mdio: MDIO interface * @cmd: Ethtool request structure diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fc4d75c6cec3..673d5cae7c81 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -467,6 +467,7 @@ static inline __init_memblock bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); +unsigned long memblock_estimated_nr_free_pages(void); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0e5bf25d324f..5502aa8e138e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -57,7 +57,7 @@ enum memcg_memory_event { struct mem_cgroup_reclaim_cookie { pg_data_t *pgdat; - unsigned int generation; + int generation; }; #ifdef CONFIG_MEMCG @@ -70,6 +70,7 @@ struct mem_cgroup_id { }; struct memcg_vmstats_percpu; +struct memcg1_events_percpu; struct memcg_vmstats; struct lruvec_stats_percpu; struct lruvec_stats; @@ -77,7 +78,7 @@ struct lruvec_stats; struct mem_cgroup_reclaim_iter { struct mem_cgroup *position; /* scan generation, increased every round-trip */ - unsigned int generation; + atomic_t generation; }; /* @@ -193,6 +194,11 @@ struct mem_cgroup { struct page_counter memsw; /* v1 only */ }; + /* registered local peak watchers */ + struct list_head memory_peaks; + struct list_head swap_peaks; + spinlock_t peaks_lock; + /* Range enforcement for interrupt charges */ struct work_struct high_work; @@ -270,6 +276,8 @@ struct mem_cgroup { struct page_counter kmem; /* v1 only */ struct page_counter tcpmem; /* v1 only */ + struct memcg1_events_percpu __percpu *events_percpu; + unsigned long soft_limit; /* protected by memcg_oom_lock */ @@ -291,25 +299,10 @@ struct mem_cgroup { /* For oom notifier event fd */ struct list_head oom_notify; - /* - * Should we move charges of a task when a task is moved into this - * mem_cgroup ? And what type of charges should we move ? - */ - unsigned long move_charge_at_immigrate; - /* taken only while moving_account > 0 */ - spinlock_t move_lock; - unsigned long move_lock_flags; - /* Legacy tcp memory accounting */ bool tcpmem_active; int tcpmem_pressure; - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; - struct task_struct *move_lock_task; - /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; @@ -361,11 +354,11 @@ static inline bool folio_memcg_kmem(struct folio *folio); * After the initialization objcg->memcg is always pointing at * a valid memcg, but can be atomically swapped to the parent memcg. * - * The caller must ensure that the returned memcg won't be released: - * e.g. acquire the rcu_read_lock or css_set_lock. + * The caller must ensure that the returned memcg won't be released. */ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) { + lockdep_assert_once(rcu_read_lock_held() || lockdep_is_held(&cgroup_mutex)); return READ_ONCE(objcg->memcg); } @@ -425,9 +418,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) * * - the folio lock * - LRU isolation - * - folio_memcg_lock() * - exclusive reference - * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -439,32 +430,17 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) return __folio_memcg(folio); } -/** - * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. +/* + * folio_memcg_charged - If a folio is charged to a memory cgroup. * @folio: Pointer to the folio. * - * This function assumes that the folio is known to have a - * proper memory cgroup pointer. It's not safe to call this function - * against some type of folios, e.g. slab folios or ex-slab folios. - * - * Return: A pointer to the memory cgroup associated with the folio, - * or NULL. + * Returns true if folio is charged to a memory cgroup, otherwise returns false. */ -static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) +static inline bool folio_memcg_charged(struct folio *folio) { - unsigned long memcg_data = READ_ONCE(folio->memcg_data); - - VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); - WARN_ON_ONCE(!rcu_read_lock_held()); - - if (memcg_data & MEMCG_DATA_KMEM) { - struct obj_cgroup *objcg; - - objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); - return obj_cgroup_memcg(objcg); - } - - return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); + if (folio_memcg_kmem(folio)) + return __folio_objcg(folio) != NULL; + return __folio_memcg(folio) != NULL; } /* @@ -482,9 +458,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) * * - the folio lock * - LRU isolation - * - lock_folio_memcg() * - exclusive reference - * - mem_cgroup_trylock_pages() * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released. @@ -677,7 +651,8 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); + +void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); void __mem_cgroup_uncharge(struct folio *folio); @@ -762,6 +737,8 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); struct mem_cgroup *get_mem_cgroup_from_current(void); +struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio); + struct lruvec *folio_lruvec_lock(struct folio *folio); struct lruvec *folio_lruvec_lock_irq(struct folio *folio); struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, @@ -1006,8 +983,8 @@ static inline void count_memcg_folio_events(struct folio *folio, count_memcg_events(memcg, idx, nr); } -static inline void count_memcg_event_mm(struct mm_struct *mm, - enum vm_event_item idx) +static inline void count_memcg_events_mm(struct mm_struct *mm, + enum vm_event_item idx, unsigned long count) { struct mem_cgroup *memcg; @@ -1017,10 +994,16 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); if (likely(memcg)) - count_memcg_events(memcg, idx, 1); + count_memcg_events(memcg, idx, count); rcu_read_unlock(); } +static inline void count_memcg_event_mm(struct mm_struct *mm, + enum vm_event_item idx) +{ + count_memcg_events_mm(mm, idx, 1); +} + static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { @@ -1072,10 +1055,9 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) return NULL; } -static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) +static inline bool folio_memcg_charged(struct folio *folio) { - WARN_ON_ONCE(!rcu_read_lock_held()); - return NULL; + return false; } static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) @@ -1176,7 +1158,7 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, return 0; } -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) +static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr) { } @@ -1240,12 +1222,21 @@ static inline struct mem_cgroup *get_mem_cgroup_from_current(void) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) { return NULL; } +static inline void obj_cgroup_get(struct obj_cgroup *objcg) +{ +} + static inline void obj_cgroup_put(struct obj_cgroup *objcg) { } @@ -1462,6 +1453,11 @@ static inline void count_memcg_folio_events(struct folio *folio, { } +static inline void count_memcg_events_mm(struct mm_struct *mm, + enum vm_event_item idx, unsigned long count) +{ +} + static inline void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { @@ -1717,11 +1713,11 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) return memcg ? memcg->kmemcg_id : -1; } -struct mem_cgroup *mem_cgroup_from_obj(void *p); struct mem_cgroup *mem_cgroup_from_slab_obj(void *p); -static inline void count_objcg_event(struct obj_cgroup *objcg, - enum vm_event_item idx) +static inline void count_objcg_events(struct obj_cgroup *objcg, + enum vm_event_item idx, + unsigned long count) { struct mem_cgroup *memcg; @@ -1730,7 +1726,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); - count_memcg_events(memcg, idx, 1); + count_memcg_events(memcg, idx, count); rcu_read_unlock(); } @@ -1780,18 +1776,14 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg) return -1; } -static inline struct mem_cgroup *mem_cgroup_from_obj(void *p) -{ - return NULL; -} - static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p) { return NULL; } -static inline void count_objcg_event(struct obj_cgroup *objcg, - enum vm_event_item idx) +static inline void count_objcg_events(struct obj_cgroup *objcg, + enum vm_event_item idx, + unsigned long count) { } @@ -1837,26 +1829,6 @@ static inline bool task_in_memcg_oom(struct task_struct *p) return p->memcg_in_oom; } -void folio_memcg_lock(struct folio *folio); -void folio_memcg_unlock(struct folio *folio); - -/* try to stablize folio_memcg() for all the pages in a memcg */ -static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) -{ - rcu_read_lock(); - - if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) - return true; - - rcu_read_unlock(); - return false; -} - -static inline void mem_cgroup_unlock_pages(void) -{ - rcu_read_unlock(); -} - static inline void mem_cgroup_enter_user_fault(void) { WARN_ON(current->in_user_fault); @@ -1878,26 +1850,6 @@ unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, return 0; } -static inline void folio_memcg_lock(struct folio *folio) -{ -} - -static inline void folio_memcg_unlock(struct folio *folio) -{ -} - -static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) -{ - /* to match folio_memcg_rcu() */ - rcu_read_lock(); - return true; -} - -static inline void mem_cgroup_unlock_pages(void) -{ - rcu_read_unlock(); -} - static inline bool task_in_memcg_oom(struct task_struct *p) { return false; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ebe876930e78..b27ddce5d324 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -16,54 +16,6 @@ struct resource; struct vmem_altmap; struct dev_pagemap; -#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION -/* - * For supporting node-hotadd, we have to allocate a new pgdat. - * - * If an arch has generic style NODE_DATA(), - * node_data[nid] = kzalloc() works well. But it depends on the architecture. - * - * In general, generic_alloc_nodedata() is used. - * - */ -extern pg_data_t *arch_alloc_nodedata(int nid); -extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat); - -#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - -#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid) - -#ifdef CONFIG_NUMA -/* - * XXX: node aware allocation can't work well to get new node's memory at this time. - * Because, pgdat for the new node is not allocated/initialized yet itself. - * To use new node's memory, more consideration will be necessary. - */ -#define generic_alloc_nodedata(nid) \ -({ \ - memblock_alloc(sizeof(*pgdat), SMP_CACHE_BYTES); \ -}) - -extern pg_data_t *node_data[]; -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ - node_data[nid] = pgdat; -} - -#else /* !CONFIG_NUMA */ - -/* never called */ -static inline pg_data_t *generic_alloc_nodedata(int nid) -{ - BUG(); - return NULL; -} -static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat) -{ -} -#endif /* CONFIG_NUMA */ -#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */ - #ifdef CONFIG_MEMORY_HOTPLUG struct page *pfn_to_online_page(unsigned long pfn); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 1add16f21612..ce9885e0178a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -47,7 +47,7 @@ struct mempolicy { atomic_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ - nodemask_t nodes; /* interleave/bind/perfer */ + nodemask_t nodes; /* interleave/bind/preferred/etc */ int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */ union { diff --git a/include/linux/memstick.h b/include/linux/memstick.h index ebf73d4ee969..107bdcbedf79 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -293,7 +293,7 @@ struct memstick_host { }; struct memstick_driver { - struct memstick_device_id *id_table; + const struct memstick_device_id *id_table; int (*probe)(struct memstick_dev *card); void (*remove)(struct memstick_dev *card); int (*suspend)(struct memstick_dev *card, diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h index def5df6e74bf..551ef1c367d6 100644 --- a/include/linux/mfd/88pm80x.h +++ b/include/linux/mfd/88pm80x.h @@ -294,7 +294,7 @@ struct pm80x_chip { struct i2c_client *client; struct i2c_client *companion; struct regmap *regmap; - struct regmap_irq_chip *regmap_irq_chip; + const struct regmap_irq_chip *regmap_irq_chip; struct regmap_irq_chip_data *irq_data; int type; int irq; diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h index 133aa302e492..85eca44f39ab 100644 --- a/include/linux/mfd/88pm886.h +++ b/include/linux/mfd/88pm886.h @@ -31,6 +31,15 @@ #define PM886_INT_WC BIT(1) #define PM886_INT_MASK_MODE BIT(2) +#define PM886_REG_RTC_CNT1 0xd1 +#define PM886_REG_RTC_CNT2 0xd2 +#define PM886_REG_RTC_CNT3 0xd3 +#define PM886_REG_RTC_CNT4 0xd4 +#define PM886_REG_RTC_SPARE1 0xea +#define PM886_REG_RTC_SPARE2 0xeb +#define PM886_REG_RTC_SPARE3 0xec +#define PM886_REG_RTC_SPARE4 0xed +#define PM886_REG_RTC_SPARE5 0xee #define PM886_REG_RTC_SPARE6 0xef #define PM886_REG_BUCK_EN 0x08 diff --git a/include/linux/mfd/adp5585.h b/include/linux/mfd/adp5585.h new file mode 100644 index 000000000000..016033cd68e4 --- /dev/null +++ b/include/linux/mfd/adp5585.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Analog Devices ADP5585 I/O expander, PWM controller and keypad controller + * + * Copyright 2022 NXP + * Copyright 2024 Ideas on Board Oy + */ + +#ifndef __MFD_ADP5585_H_ +#define __MFD_ADP5585_H_ + +#include <linux/bits.h> + +#define ADP5585_ID 0x00 +#define ADP5585_MAN_ID_VALUE 0x20 +#define ADP5585_MAN_ID_MASK GENMASK(7, 4) +#define ADP5585_INT_STATUS 0x01 +#define ADP5585_STATUS 0x02 +#define ADP5585_FIFO_1 0x03 +#define ADP5585_FIFO_2 0x04 +#define ADP5585_FIFO_3 0x05 +#define ADP5585_FIFO_4 0x06 +#define ADP5585_FIFO_5 0x07 +#define ADP5585_FIFO_6 0x08 +#define ADP5585_FIFO_7 0x09 +#define ADP5585_FIFO_8 0x0a +#define ADP5585_FIFO_9 0x0b +#define ADP5585_FIFO_10 0x0c +#define ADP5585_FIFO_11 0x0d +#define ADP5585_FIFO_12 0x0e +#define ADP5585_FIFO_13 0x0f +#define ADP5585_FIFO_14 0x10 +#define ADP5585_FIFO_15 0x11 +#define ADP5585_FIFO_16 0x12 +#define ADP5585_GPI_INT_STAT_A 0x13 +#define ADP5585_GPI_INT_STAT_B 0x14 +#define ADP5585_GPI_STATUS_A 0x15 +#define ADP5585_GPI_STATUS_B 0x16 +#define ADP5585_RPULL_CONFIG_A 0x17 +#define ADP5585_RPULL_CONFIG_B 0x18 +#define ADP5585_RPULL_CONFIG_C 0x19 +#define ADP5585_RPULL_CONFIG_D 0x1a +#define ADP5585_Rx_PULL_CFG_PU_300K 0 +#define ADP5585_Rx_PULL_CFG_PD_300K 1 +#define ADP5585_Rx_PULL_CFG_PU_100K 2 +#define ADP5585_Rx_PULL_CFG_DISABLE 3 +#define ADP5585_Rx_PULL_CFG_MASK 3 +#define ADP5585_GPI_INT_LEVEL_A 0x1b +#define ADP5585_GPI_INT_LEVEL_B 0x1c +#define ADP5585_GPI_EVENT_EN_A 0x1d +#define ADP5585_GPI_EVENT_EN_B 0x1e +#define ADP5585_GPI_INTERRUPT_EN_A 0x1f +#define ADP5585_GPI_INTERRUPT_EN_B 0x20 +#define ADP5585_DEBOUNCE_DIS_A 0x21 +#define ADP5585_DEBOUNCE_DIS_B 0x22 +#define ADP5585_GPO_DATA_OUT_A 0x23 +#define ADP5585_GPO_DATA_OUT_B 0x24 +#define ADP5585_GPO_OUT_MODE_A 0x25 +#define ADP5585_GPO_OUT_MODE_B 0x26 +#define ADP5585_GPIO_DIRECTION_A 0x27 +#define ADP5585_GPIO_DIRECTION_B 0x28 +#define ADP5585_RESET1_EVENT_A 0x29 +#define ADP5585_RESET1_EVENT_B 0x2a +#define ADP5585_RESET1_EVENT_C 0x2b +#define ADP5585_RESET2_EVENT_A 0x2c +#define ADP5585_RESET2_EVENT_B 0x2d +#define ADP5585_RESET_CFG 0x2e +#define ADP5585_PWM_OFFT_LOW 0x2f +#define ADP5585_PWM_OFFT_HIGH 0x30 +#define ADP5585_PWM_ONT_LOW 0x31 +#define ADP5585_PWM_ONT_HIGH 0x32 +#define ADP5585_PWM_CFG 0x33 +#define ADP5585_PWM_IN_AND BIT(2) +#define ADP5585_PWM_MODE BIT(1) +#define ADP5585_PWM_EN BIT(0) +#define ADP5585_LOGIC_CFG 0x34 +#define ADP5585_LOGIC_FF_CFG 0x35 +#define ADP5585_LOGIC_INT_EVENT_EN 0x36 +#define ADP5585_POLL_PTIME_CFG 0x37 +#define ADP5585_PIN_CONFIG_A 0x38 +#define ADP5585_PIN_CONFIG_B 0x39 +#define ADP5585_PIN_CONFIG_C 0x3a +#define ADP5585_PULL_SELECT BIT(7) +#define ADP5585_C4_EXTEND_CFG_GPIO11 (0U << 6) +#define ADP5585_C4_EXTEND_CFG_RESET2 (1U << 6) +#define ADP5585_C4_EXTEND_CFG_MASK GENMASK(6, 6) +#define ADP5585_R4_EXTEND_CFG_GPIO5 (0U << 5) +#define ADP5585_R4_EXTEND_CFG_RESET1 (1U << 5) +#define ADP5585_R4_EXTEND_CFG_MASK GENMASK(5, 5) +#define ADP5585_R3_EXTEND_CFG_GPIO4 (0U << 2) +#define ADP5585_R3_EXTEND_CFG_LC (1U << 2) +#define ADP5585_R3_EXTEND_CFG_PWM_OUT (2U << 2) +#define ADP5585_R3_EXTEND_CFG_MASK GENMASK(3, 2) +#define ADP5585_R0_EXTEND_CFG_GPIO1 (0U << 0) +#define ADP5585_R0_EXTEND_CFG_LY (1U << 0) +#define ADP5585_R0_EXTEND_CFG_MASK GENMASK(0, 0) +#define ADP5585_GENERAL_CFG 0x3b +#define ADP5585_OSC_EN BIT(7) +#define ADP5585_OSC_FREQ_50KHZ (0U << 5) +#define ADP5585_OSC_FREQ_100KHZ (1U << 5) +#define ADP5585_OSC_FREQ_200KHZ (2U << 5) +#define ADP5585_OSC_FREQ_500KHZ (3U << 5) +#define ADP5585_OSC_FREQ_MASK GENMASK(6, 5) +#define ADP5585_INT_CFG BIT(1) +#define ADP5585_RST_CFG BIT(0) +#define ADP5585_INT_EN 0x3c + +#define ADP5585_MAX_REG ADP5585_INT_EN + +/* + * Bank 0 covers pins "GPIO 1/R0" to "GPIO 6/R5", numbered 0 to 5 by the + * driver, and bank 1 covers pins "GPIO 7/C0" to "GPIO 11/C4", numbered 6 to + * 10. Some variants of the ADP5585 don't support "GPIO 6/R5". As the driver + * uses identical GPIO numbering for all variants to avoid confusion, GPIO 5 is + * marked as reserved in the device tree for variants that don't support it. + */ +#define ADP5585_BANK(n) ((n) >= 6 ? 1 : 0) +#define ADP5585_BIT(n) ((n) >= 6 ? BIT((n) - 6) : BIT(n)) + +struct regmap; + +struct adp5585_dev { + struct regmap *regmap; +}; + +#endif diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 8c0a33a2e9ce..c3df0e615fbf 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -19,6 +19,7 @@ enum axp20x_variants { AXP223_ID, AXP288_ID, AXP313A_ID, + AXP323_ID, AXP717_ID, AXP803_ID, AXP806_ID, @@ -113,8 +114,19 @@ enum axp20x_variants { #define AXP313A_SHUTDOWN_CTRL 0x1a #define AXP313A_IRQ_EN 0x20 #define AXP313A_IRQ_STATE 0x21 +#define AXP323_DCDC_MODE_CTRL2 0x22 #define AXP717_ON_INDICATE 0x00 +#define AXP717_PMU_STATUS_2 0x01 +#define AXP717_BC_DETECT 0x05 +#define AXP717_PMU_FAULT 0x08 +#define AXP717_MODULE_EN_CONTROL_1 0x0b +#define AXP717_MIN_SYS_V_CONTROL 0x15 +#define AXP717_INPUT_VOL_LIMIT_CTRL 0x16 +#define AXP717_INPUT_CUR_LIMIT_CTRL 0x17 +#define AXP717_MODULE_EN_CONTROL_2 0x19 +#define AXP717_BOOST_CONTROL 0x1e +#define AXP717_VSYS_V_POWEROFF 0x24 #define AXP717_IRQ0_EN 0x40 #define AXP717_IRQ1_EN 0x41 #define AXP717_IRQ2_EN 0x42 @@ -125,6 +137,9 @@ enum axp20x_variants { #define AXP717_IRQ2_STATE 0x4a #define AXP717_IRQ3_STATE 0x4b #define AXP717_IRQ4_STATE 0x4c +#define AXP717_ICC_CHG_SET 0x62 +#define AXP717_ITERM_CHG_SET 0x63 +#define AXP717_CV_CHG_SET 0x64 #define AXP717_DCDC_OUTPUT_CONTROL 0x80 #define AXP717_DCDC1_CONTROL 0x83 #define AXP717_DCDC2_CONTROL 0x84 @@ -145,6 +160,19 @@ enum axp20x_variants { #define AXP717_CLDO3_CONTROL 0x9d #define AXP717_CLDO4_CONTROL 0x9e #define AXP717_CPUSLDO_CONTROL 0x9f +#define AXP717_BATT_PERCENT_DATA 0xa4 +#define AXP717_ADC_CH_EN_CONTROL 0xc0 +#define AXP717_BATT_V_H 0xc4 +#define AXP717_BATT_V_L 0xc5 +#define AXP717_VBUS_V_H 0xc6 +#define AXP717_VBUS_V_L 0xc7 +#define AXP717_VSYS_V_H 0xc8 +#define AXP717_VSYS_V_L 0xc9 +#define AXP717_BATT_CHRG_I_H 0xca +#define AXP717_BATT_CHRG_I_L 0xcb +#define AXP717_ADC_DATA_SEL 0xcd +#define AXP717_ADC_DATA_H 0xce +#define AXP717_ADC_DATA_L 0xcf #define AXP806_STARTUP_SRC 0x00 #define AXP806_CHIP_ID 0x03 @@ -484,6 +512,7 @@ enum { AXP717_CLDO3, AXP717_CLDO4, AXP717_CPUSLDO, + AXP717_BOOST, AXP717_REG_ID_MAX, }; @@ -932,7 +961,7 @@ struct axp20x_dev { unsigned long irq_flags; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irqc; - long variant; + enum axp20x_variants variant; int nr_cells; const struct mfd_cell *cells; const struct regmap_config *regmap_cfg; diff --git a/include/linux/mfd/cgbc.h b/include/linux/mfd/cgbc.h new file mode 100644 index 000000000000..badbec4c7033 --- /dev/null +++ b/include/linux/mfd/cgbc.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Congatec Board Controller driver definitions + * + * Copyright (C) 2024 Bootlin + * Author: Thomas Richard <thomas.richard@bootlin.com> + */ + +#ifndef _LINUX_MFD_CGBC_H_ + +/** + * struct cgbc_version - Board Controller device version structure + * @feature: Board Controller feature number + * @major: Board Controller major revision + * @minor: Board Controller minor revision + */ +struct cgbc_version { + unsigned char feature; + unsigned char major; + unsigned char minor; +}; + +/** + * struct cgbc_device_data - Internal representation of the Board Controller device + * @io_session: Pointer to the session IO memory + * @io_cmd: Pointer to the command IO memory + * @session: Session id returned by the Board Controller + * @dev: Pointer to kernel device structure + * @cgbc_version: Board Controller version structure + * @mutex: Board Controller mutex + */ +struct cgbc_device_data { + void __iomem *io_session; + void __iomem *io_cmd; + u8 session; + struct device *dev; + struct cgbc_version version; + struct mutex lock; +}; + +int cgbc_command(struct cgbc_device_data *cgbc, void *cmd, unsigned int cmd_size, + void *data, unsigned int data_size, u8 *status); + +#endif /*_LINUX_MFD_CGBC_H_*/ diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 8db52324f416..eae82f421414 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -78,6 +78,7 @@ struct da9063 { enum da9063_type type; unsigned char variant_code; unsigned int flags; + bool use_sw_pm; /* Control interface */ struct regmap *regmap; diff --git a/include/linux/mfd/ds1wm.h b/include/linux/mfd/ds1wm.h deleted file mode 100644 index 43dfca1c9702..000000000000 --- a/include/linux/mfd/ds1wm.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* MFD cell driver data for the DS1WM driver - * - * to be defined in the MFD device that is - * using this driver for one of his sub devices - */ - -struct ds1wm_driver_data { - int active_high; - int clock_rate; - /* in milliseconds, the amount of time to - * sleep following a reset pulse. Zero - * should work if your bus devices recover - * time respects the 1-wire spec since the - * ds1wm implements the precise timings of - * a reset pulse/presence detect sequence. - */ - unsigned int reset_recover_delay; - - /* Say 1 here for big endian Hardware - * (only relevant with bus-shift > 0 - */ - bool is_hw_big_endian; - - /* left shift of register number to get register address offsett. - * Only 0,1,2 allowed for 8,16 or 32 bit bus width respectively - */ - unsigned int bus_shift; -}; diff --git a/include/linux/mfd/max5970.h b/include/linux/mfd/max5970.h index 762a7d40c843..fc50e89edfaa 100644 --- a/include/linux/mfd/max5970.h +++ b/include/linux/mfd/max5970.h @@ -16,18 +16,6 @@ #define MAX5978_NUM_SWITCHES 1 #define MAX5970_NUM_LEDS 4 -struct max5970_data { - int num_switches; - u32 irng[MAX5970_NUM_SWITCHES]; - u32 mon_rng[MAX5970_NUM_SWITCHES]; - u32 shunt_micro_ohms[MAX5970_NUM_SWITCHES]; -}; - -enum max5970_chip_type { - TYPE_MAX5978 = 1, - TYPE_MAX5970, -}; - #define MAX5970_REG_CURRENT_L(ch) (0x01 + (ch) * 4) #define MAX5970_REG_CURRENT_H(ch) (0x00 + (ch) * 4) #define MAX5970_REG_VOLTAGE_L(ch) (0x03 + (ch) * 4) diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 54444ff2a5de..c324d548619e 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -217,6 +217,10 @@ enum max77693_charger_battery_state { #define CHG_CNFG_01_CHGRSTRT_MASK (0x3 << CHG_CNFG_01_CHGRSTRT_SHIFT) #define CHG_CNFG_01_PQEN_MAKS BIT(CHG_CNFG_01_PQEN_SHIFT) +/* MAX77693_CHG_REG_CHG_CNFG_02 register */ +#define CHG_CNFG_02_CC_SHIFT 0 +#define CHG_CNFG_02_CC_MASK 0x3F + /* MAX77693_CHG_REG_CHG_CNFG_03 register */ #define CHG_CNFG_03_TOITH_SHIFT 0 #define CHG_CNFG_03_TOTIME_SHIFT 3 @@ -244,6 +248,7 @@ enum max77693_charger_battery_state { #define CHG_CNFG_12_VCHGINREG_MASK (0x3 << CHG_CNFG_12_VCHGINREG_SHIFT) /* MAX77693 CHG_CNFG_09 Register */ +#define CHG_CNFG_09_CHGIN_ILIM_SHIFT 0 #define CHG_CNFG_09_CHGIN_ILIM_MASK 0x7F /* MAX77693 CHG_CTRL Register */ @@ -414,17 +419,6 @@ enum max77693_haptic_reg { #define MAX77693_CONFIG2_MEN 6 #define MAX77693_CONFIG2_HTYP 5 -enum max77693_irq_source { - LED_INT = 0, - TOPSYS_INT, - CHG_INT, - MUIC_INT1, - MUIC_INT2, - MUIC_INT3, - - MAX77693_IRQ_GROUP_NR, -}; - #define SRC_IRQ_CHARGER BIT(0) #define SRC_IRQ_TOP BIT(1) #define SRC_IRQ_FLASH BIT(2) diff --git a/include/linux/mfd/mt6328/core.h b/include/linux/mfd/mt6328/core.h new file mode 100644 index 000000000000..9a08aed72b9f --- /dev/null +++ b/include/linux/mfd/mt6328/core.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015 MediaTek Inc. + * Copyright (c) 2022 Yassine Oudjana <y.oudjana@protonmail.com> + */ + +#ifndef __MFD_MT6328_CORE_H__ +#define __MFD_MT6328_CORE_H__ + +enum mt6328_irq_status_numbers { + MT6328_IRQ_STATUS_PWRKEY = 0, + MT6328_IRQ_STATUS_HOMEKEY, + MT6328_IRQ_STATUS_PWRKEY_R, + MT6328_IRQ_STATUS_HOMEKEY_R, + MT6328_IRQ_STATUS_THR_H, + MT6328_IRQ_STATUS_THR_L, + MT6328_IRQ_STATUS_BAT_H, + MT6328_IRQ_STATUS_BAT_L, + MT6328_IRQ_STATUS_RTC, + MT6328_IRQ_STATUS_AUDIO, + MT6328_IRQ_STATUS_ACCDET, + MT6328_IRQ_STATUS_ACCDET_EINT, + MT6328_IRQ_STATUS_ACCDET_NEGV, + MT6328_IRQ_STATUS_NI_LBAT_INT, + MT6328_IRQ_STATUS_VPROC_OC = 16, + MT6328_IRQ_STATUS_VSYS_OC, + MT6328_IRQ_STATUS_VLTE_OC, + MT6328_IRQ_STATUS_VCORE_OC, + MT6328_IRQ_STATUS_VPA_OC, + MT6328_IRQ_STATUS_LDO_OC, + MT6328_IRQ_STATUS_BAT2_H, + MT6328_IRQ_STATUS_BAT2_L, + MT6328_IRQ_STATUS_VISMPS0_H, + MT6328_IRQ_STATUS_VISMPS0_L, + MT6328_IRQ_STATUS_AUXADC_IMP, + MT6328_IRQ_STATUS_OV = 32, + MT6328_IRQ_STATUS_BVALID_DET, + MT6328_IRQ_STATUS_VBATON_HV, + MT6328_IRQ_STATUS_VBATON_UNDET, + MT6328_IRQ_STATUS_WATCHDOG, + MT6328_IRQ_STATUS_PCHR_CM_VDEC, + MT6328_IRQ_STATUS_CHRDET, + MT6328_IRQ_STATUS_PCHR_CM_VINC, + MT6328_IRQ_STATUS_FG_BAT_H, + MT6328_IRQ_STATUS_FG_BAT_L, + MT6328_IRQ_STATUS_FG_CUR_H, + MT6328_IRQ_STATUS_FG_CUR_L, + MT6328_IRQ_STATUS_FG_ZCV, + MT6328_IRQ_STATUS_SPKL_D, + MT6328_IRQ_STATUS_SPKL_AB, +}; + +#endif /* __MFD_MT6323_CORE_H__ */ diff --git a/include/linux/mfd/mt6328/registers.h b/include/linux/mfd/mt6328/registers.h new file mode 100644 index 000000000000..8199aaea27b9 --- /dev/null +++ b/include/linux/mfd/mt6328/registers.h @@ -0,0 +1,822 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Yassine Oudjana <y.oudjana@protonmail.com> + */ + +#ifndef __MFD_MT6328_REGISTERS_H__ +#define __MFD_MT6328_REGISTERS_H__ + +/* PMIC Registers */ +#define MT6328_STRUP_CON0 0x0000 +#define MT6328_STRUP_CON2 0x0002 +#define MT6328_STRUP_CON3 0x0004 +#define MT6328_STRUP_CON4 0x0006 +#define MT6328_STRUP_CON5 0x0008 +#define MT6328_STRUP_CON6 0x000a +#define MT6328_STRUP_CON7 0x000c +#define MT6328_STRUP_CON8 0x000e +#define MT6328_STRUP_CON9 0x0010 +#define MT6328_STRUP_CON10 0x0012 +#define MT6328_STRUP_CON11 0x0014 +#define MT6328_STRUP_CON12 0x0016 +#define MT6328_STRUP_CON13 0x0018 +#define MT6328_STRUP_CON14 0x001a +#define MT6328_STRUP_CON15 0x001c +#define MT6328_STRUP_CON16 0x001e +#define MT6328_STRUP_CON17 0x0020 +#define MT6328_STRUP_CON18 0x0022 +#define MT6328_STRUP_CON19 0x0024 +#define MT6328_STRUP_CON20 0x0026 +#define MT6328_STRUP_CON21 0x0028 +#define MT6328_STRUP_CON22 0x002a +#define MT6328_STRUP_CON23 0x002c +#define MT6328_STRUP_CON24 0x002e +#define MT6328_STRUP_CON25 0x0030 +#define MT6328_STRUP_CON26 0x0032 +#define MT6328_STRUP_CON27 0x0034 +#define MT6328_STRUP_CON28 0x0036 +#define MT6328_STRUP_CON29 0x0038 +#define MT6328_STRUP_CON30 0x003a +#define MT6328_STRUP_CON31 0x003c +#define MT6328_STRUP_CON32 0x003e +#define MT6328_STRUP_ANA_CON0 0x0040 +#define MT6328_HWCID 0x0200 +#define MT6328_SWCID 0x0202 +#define MT6328_TOP_CON 0x0204 +#define MT6328_TEST_OUT 0x0206 +#define MT6328_TEST_CON0 0x0208 +#define MT6328_TEST_CON1 0x020a +#define MT6328_TESTMODE_SW 0x020c +#define MT6328_EN_STATUS0 0x020e +#define MT6328_EN_STATUS1 0x0210 +#define MT6328_EN_STATUS2 0x0212 +#define MT6328_OCSTATUS0 0x0214 +#define MT6328_OCSTATUS1 0x0216 +#define MT6328_OCSTATUS2 0x0218 +#define MT6328_PGDEBSTATUS 0x021a +#define MT6328_PGSTATUS 0x021c +#define MT6328_THERMALSTATUS 0x021e +#define MT6328_TOPSTATUS 0x0220 +#define MT6328_TDSEL_CON 0x0222 +#define MT6328_RDSEL_CON 0x0224 +#define MT6328_SMT_CON0 0x0226 +#define MT6328_SMT_CON1 0x0228 +#define MT6328_SMT_CON2 0x022a +#define MT6328_DRV_CON0 0x022c +#define MT6328_DRV_CON1 0x022e +#define MT6328_DRV_CON2 0x0230 +#define MT6328_DRV_CON3 0x0232 +#define MT6328_TOP_STATUS 0x0234 +#define MT6328_TOP_STATUS_SET 0x0236 +#define MT6328_TOP_STATUS_CLR 0x0238 +#define MT6328_RGS_ANA_MON 0x023a +#define MT6328_TOP_CKPDN_CON0 0x023c +#define MT6328_TOP_CKPDN_CON0_SET 0x023e +#define MT6328_TOP_CKPDN_CON0_CLR 0x0240 +#define MT6328_TOP_CKPDN_CON1 0x0242 +#define MT6328_TOP_CKPDN_CON1_SET 0x0244 +#define MT6328_TOP_CKPDN_CON1_CLR 0x0246 +#define MT6328_TOP_CKPDN_CON2 0x0248 +#define MT6328_TOP_CKPDN_CON2_SET 0x024a +#define MT6328_TOP_CKPDN_CON2_CLR 0x024c +#define MT6328_TOP_CKPDN_CON3 0x024e +#define MT6328_TOP_CKPDN_CON3_SET 0x0250 +#define MT6328_TOP_CKPDN_CON3_CLR 0x0252 +#define MT6328_TOP_CKPDN_CON4 0x0254 +#define MT6328_TOP_CKPDN_CON4_SET 0x0256 +#define MT6328_TOP_CKPDN_CON4_CLR 0x0258 +#define MT6328_TOP_CKSEL_CON0 0x025a +#define MT6328_TOP_CKSEL_CON0_SET 0x025c +#define MT6328_TOP_CKSEL_CON0_CLR 0x025e +#define MT6328_TOP_CKSEL_CON1 0x0260 +#define MT6328_TOP_CKSEL_CON1_SET 0x0262 +#define MT6328_TOP_CKSEL_CON1_CLR 0x0264 +#define MT6328_TOP_CKSEL_CON2 0x0266 +#define MT6328_TOP_CKSEL_CON2_SET 0x0268 +#define MT6328_TOP_CKSEL_CON2_CLR 0x026a +#define MT6328_TOP_CKDIVSEL_CON0 0x026c +#define MT6328_TOP_CKDIVSEL_CON0_SET 0x026e +#define MT6328_TOP_CKDIVSEL_CON0_CLR 0x0270 +#define MT6328_TOP_CKDIVSEL_CON1 0x0272 +#define MT6328_TOP_CKDIVSEL_CON1_SET 0x0274 +#define MT6328_TOP_CKDIVSEL_CON1_CLR 0x0276 +#define MT6328_TOP_CKHWEN_CON0 0x0278 +#define MT6328_TOP_CKHWEN_CON0_SET 0x027a +#define MT6328_TOP_CKHWEN_CON0_CLR 0x027c +#define MT6328_TOP_CKHWEN_CON1 0x027e +#define MT6328_TOP_CKHWEN_CON1_SET 0x0280 +#define MT6328_TOP_CKHWEN_CON1_CLR 0x0282 +#define MT6328_TOP_CKTST_CON0 0x0284 +#define MT6328_TOP_CKTST_CON1 0x0286 +#define MT6328_TOP_CKTST_CON2 0x0288 +#define MT6328_TOP_CLKSQ 0x028a +#define MT6328_TOP_CLKSQ_SET 0x028c +#define MT6328_TOP_CLKSQ_CLR 0x028e +#define MT6328_TOP_CLKSQ_RTC 0x0290 +#define MT6328_TOP_CLKSQ_RTC_SET 0x0292 +#define MT6328_TOP_CLKSQ_RTC_CLR 0x0294 +#define MT6328_TOP_CLK_TRIM 0x0296 +#define MT6328_TOP_RST_CON0 0x0298 +#define MT6328_TOP_RST_CON0_SET 0x029a +#define MT6328_TOP_RST_CON0_CLR 0x029c +#define MT6328_TOP_RST_CON1 0x029e +#define MT6328_TOP_RST_MISC 0x02a0 +#define MT6328_TOP_RST_MISC_SET 0x02a2 +#define MT6328_TOP_RST_MISC_CLR 0x02a4 +#define MT6328_TOP_RST_STATUS 0x02a6 +#define MT6328_TOP_RST_STATUS_SET 0x02a8 +#define MT6328_TOP_RST_STATUS_CLR 0x02aa +#define MT6328_INT_CON0 0x02ac +#define MT6328_INT_CON0_SET 0x02ae +#define MT6328_INT_CON0_CLR 0x02b0 +#define MT6328_INT_CON1 0x02b2 +#define MT6328_INT_CON1_SET 0x02b4 +#define MT6328_INT_CON1_CLR 0x02b6 +#define MT6328_INT_CON2 0x02b8 +#define MT6328_INT_CON2_SET 0x02ba +#define MT6328_INT_CON2_CLR 0x02bc +#define MT6328_INT_MISC_CON 0x02be +#define MT6328_INT_MISC_CON_SET 0x02c0 +#define MT6328_INT_MISC_CON_CLR 0x02c2 +#define MT6328_INT_STATUS0 0x02c4 +#define MT6328_INT_STATUS1 0x02c6 +#define MT6328_INT_STATUS2 0x02c8 +#define MT6328_OC_GEAR_0 0x02ca +#define MT6328_FQMTR_CON0 0x02cc +#define MT6328_FQMTR_CON1 0x02ce +#define MT6328_FQMTR_CON2 0x02d0 +#define MT6328_RG_SPI_CON 0x02d2 +#define MT6328_DEW_DIO_EN 0x02d4 +#define MT6328_DEW_READ_TEST 0x02d6 +#define MT6328_DEW_WRITE_TEST 0x02d8 +#define MT6328_DEW_CRC_SWRST 0x02da +#define MT6328_DEW_CRC_EN 0x02dc +#define MT6328_DEW_CRC_VAL 0x02de +#define MT6328_DEW_DBG_MON_SEL 0x02e0 +#define MT6328_DEW_CIPHER_KEY_SEL 0x02e2 +#define MT6328_DEW_CIPHER_IV_SEL 0x02e4 +#define MT6328_DEW_CIPHER_EN 0x02e6 +#define MT6328_DEW_CIPHER_RDY 0x02e8 +#define MT6328_DEW_CIPHER_MODE 0x02ea +#define MT6328_DEW_CIPHER_SWRST 0x02ec +#define MT6328_DEW_RDDMY_NO 0x02ee +#define MT6328_INT_TYPE_CON0 0x02f0 +#define MT6328_INT_TYPE_CON0_SET 0x02f2 +#define MT6328_INT_TYPE_CON0_CLR 0x02f4 +#define MT6328_INT_TYPE_CON1 0x02f6 +#define MT6328_INT_TYPE_CON1_SET 0x02f8 +#define MT6328_INT_TYPE_CON1_CLR 0x02fa +#define MT6328_INT_TYPE_CON2 0x02fc +#define MT6328_INT_TYPE_CON2_SET 0x02fe +#define MT6328_INT_TYPE_CON2_CLR 0x0300 +#define MT6328_INT_STA 0x0302 +#define MT6328_BUCK_ALL_CON0 0x0400 +#define MT6328_BUCK_ALL_CON1 0x0402 +#define MT6328_BUCK_ALL_CON2 0x0404 +#define MT6328_BUCK_ALL_CON3 0x0406 +#define MT6328_BUCK_ALL_CON4 0x0408 +#define MT6328_BUCK_ALL_CON5 0x040a +#define MT6328_BUCK_ALL_CON6 0x040c +#define MT6328_BUCK_ALL_CON9 0x040e +#define MT6328_BUCK_ALL_CON12 0x0410 +#define MT6328_BUCK_ALL_CON13 0x0412 +#define MT6328_BUCK_ALL_CON14 0x0414 +#define MT6328_BUCK_ALL_CON16 0x0416 +#define MT6328_BUCK_ALL_CON18 0x0418 +#define MT6328_BUCK_ALL_CON19 0x041a +#define MT6328_BUCK_ALL_CON20 0x041c +#define MT6328_BUCK_ALL_CON21 0x041e +#define MT6328_BUCK_ALL_CON22 0x0420 +#define MT6328_BUCK_ALL_CON23 0x0422 +#define MT6328_BUCK_ALL_CON24 0x0424 +#define MT6328_BUCK_ALL_CON25 0x0426 +#define MT6328_BUCK_ALL_CON26 0x0428 +#define MT6328_BUCK_ALL_CON27 0x042a +#define MT6328_BUCK_ALL_CON28 0x042c +#define MT6328_SMPS_TOP_ANA_CON0 0x042e +#define MT6328_SMPS_TOP_ANA_CON1 0x0430 +#define MT6328_SMPS_TOP_ANA_CON2 0x0432 +#define MT6328_SMPS_TOP_ANA_CON3 0x0434 +#define MT6328_SMPS_TOP_ANA_CON4 0x0436 +#define MT6328_SMPS_TOP_ANA_CON5 0x0438 +#define MT6328_SMPS_TOP_ANA_CON6 0x043a +#define MT6328_SMPS_TOP_ANA_CON7 0x043c +#define MT6328_SMPS_TOP_ANA_CON8 0x043e +#define MT6328_VCORE_ANA_CON0 0x0440 +#define MT6328_VCORE_ANA_CON1 0x0442 +#define MT6328_VCORE_ANA_CON2 0x0444 +#define MT6328_VCORE_ANA_CON3 0x0446 +#define MT6328_VCORE_ANA_CON4 0x0448 +#define MT6328_VSYS22_ANA_CON0 0x044a +#define MT6328_VSYS22_ANA_CON1 0x044c +#define MT6328_VSYS22_ANA_CON2 0x044e +#define MT6328_VSYS22_ANA_CON3 0x0450 +#define MT6328_VSYS22_ANA_CON4 0x0452 +#define MT6328_VPROC_ANA_CON0 0x0454 +#define MT6328_VPROC_ANA_CON1 0x0456 +#define MT6328_VPROC_ANA_CON2 0x0458 +#define MT6328_VPROC_ANA_CON3 0x045a +#define MT6328_VPROC_ANA_CON4 0x045c +#define MT6328_OSC32_ANA_CON0 0x045e +#define MT6328_OSC32_ANA_CON1 0x0460 +#define MT6328_VPA_ANA_CON0 0x0462 +#define MT6328_VPA_ANA_CON1 0x0464 +#define MT6328_VPA_ANA_CON2 0x0466 +#define MT6328_VPA_ANA_CON3 0x0468 +#define MT6328_VLTE_ANA_CON0 0x046a +#define MT6328_VLTE_ANA_CON1 0x046c +#define MT6328_VLTE_ANA_CON2 0x046e +#define MT6328_VLTE_ANA_CON3 0x0470 +#define MT6328_VLTE_ANA_CON4 0x0472 +#define MT6328_VPROC_CON0 0x0474 +#define MT6328_VPROC_CON1 0x0476 +#define MT6328_VPROC_CON2 0x0478 +#define MT6328_VPROC_CON3 0x047a +#define MT6328_VPROC_CON4 0x047c +#define MT6328_VPROC_CON5 0x047e +#define MT6328_VPROC_CON6 0x0480 +#define MT6328_VPROC_CON7 0x0482 +#define MT6328_VPROC_CON8 0x0484 +#define MT6328_VPROC_CON9 0x0486 +#define MT6328_VPROC_CON10 0x0488 +#define MT6328_VPROC_CON11 0x048a +#define MT6328_VPROC_CON12 0x048c +#define MT6328_VPROC_CON13 0x048e +#define MT6328_VPROC_CON14 0x0490 +#define MT6328_VPROC_CON15 0x0492 +#define MT6328_VPROC_CON16 0x0494 +#define MT6328_VPROC_CON17 0x0496 +#define MT6328_VPROC_CON18 0x0498 +#define MT6328_VPROC_CON19 0x049a +#define MT6328_VSRAM_CON0 0x049c +#define MT6328_VSRAM_CON1 0x049e +#define MT6328_VSRAM_CON2 0x04a0 +#define MT6328_VSRAM_CON3 0x04a2 +#define MT6328_VSRAM_CON4 0x04a4 +#define MT6328_VSRAM_CON5 0x04a6 +#define MT6328_VSRAM_CON6 0x04a8 +#define MT6328_VSRAM_CON7 0x04aa +#define MT6328_VSRAM_CON8 0x04ac +#define MT6328_VSRAM_CON9 0x04ae +#define MT6328_VSRAM_CON10 0x04b0 +#define MT6328_VSRAM_CON11 0x04b2 +#define MT6328_VSRAM_CON12 0x04b4 +#define MT6328_VSRAM_CON13 0x04b6 +#define MT6328_VSRAM_CON14 0x04b8 +#define MT6328_VSRAM_CON15 0x04ba +#define MT6328_VSRAM_CON16 0x04bc +#define MT6328_VSRAM_CON17 0x04be +#define MT6328_VSRAM_CON18 0x04c0 +#define MT6328_VSRAM_CON19 0x04c2 +#define MT6328_VLTE_CON0 0x04c4 +#define MT6328_VLTE_CON1 0x04c6 +#define MT6328_VLTE_CON2 0x04c8 +#define MT6328_VLTE_CON3 0x04ca +#define MT6328_VLTE_CON4 0x04cc +#define MT6328_VLTE_CON5 0x04ce +#define MT6328_VLTE_CON6 0x04d0 +#define MT6328_VLTE_CON7 0x04d2 +#define MT6328_VLTE_CON8 0x04d4 +#define MT6328_VLTE_CON9 0x04d6 +#define MT6328_VLTE_CON10 0x04d8 +#define MT6328_VLTE_CON11 0x04da +#define MT6328_VLTE_CON12 0x04dc +#define MT6328_VLTE_CON13 0x04de +#define MT6328_VLTE_CON14 0x04e0 +#define MT6328_VLTE_CON15 0x04e2 +#define MT6328_VLTE_CON16 0x04e4 +#define MT6328_VLTE_CON17 0x04e6 +#define MT6328_VLTE_CON18 0x04e8 +#define MT6328_VLTE_CON19 0x04ea +#define MT6328_VCORE1_CON0 0x0600 +#define MT6328_VCORE1_CON1 0x0602 +#define MT6328_VCORE1_CON2 0x0604 +#define MT6328_VCORE1_CON3 0x0606 +#define MT6328_VCORE1_CON4 0x0608 +#define MT6328_VCORE1_CON5 0x060a +#define MT6328_VCORE1_CON6 0x060c +#define MT6328_VCORE1_CON7 0x060e +#define MT6328_VCORE1_CON8 0x0610 +#define MT6328_VCORE1_CON9 0x0612 +#define MT6328_VCORE1_CON10 0x0614 +#define MT6328_VCORE1_CON11 0x0616 +#define MT6328_VCORE1_CON12 0x0618 +#define MT6328_VCORE1_CON13 0x061a +#define MT6328_VCORE1_CON14 0x061c +#define MT6328_VCORE1_CON15 0x061e +#define MT6328_VCORE1_CON16 0x0620 +#define MT6328_VCORE1_CON17 0x0622 +#define MT6328_VCORE1_CON18 0x0624 +#define MT6328_VCORE1_CON19 0x0626 +#define MT6328_VSYS22_CON0 0x0628 +#define MT6328_VSYS22_CON1 0x062a +#define MT6328_VSYS22_CON2 0x062c +#define MT6328_VSYS22_CON3 0x062e +#define MT6328_VSYS22_CON4 0x0630 +#define MT6328_VSYS22_CON5 0x0632 +#define MT6328_VSYS22_CON6 0x0634 +#define MT6328_VSYS22_CON7 0x0636 +#define MT6328_VSYS22_CON8 0x0638 +#define MT6328_VSYS22_CON9 0x063a +#define MT6328_VSYS22_CON10 0x063c +#define MT6328_VSYS22_CON11 0x063e +#define MT6328_VSYS22_CON12 0x0640 +#define MT6328_VSYS22_CON13 0x0642 +#define MT6328_VSYS22_CON14 0x0644 +#define MT6328_VSYS22_CON15 0x0646 +#define MT6328_VSYS22_CON16 0x0648 +#define MT6328_VSYS22_CON17 0x064a +#define MT6328_VSYS22_CON18 0x064c +#define MT6328_VSYS22_CON19 0x064e +#define MT6328_VPA_CON0 0x0650 +#define MT6328_VPA_CON1 0x0652 +#define MT6328_VPA_CON2 0x0654 +#define MT6328_VPA_CON3 0x0656 +#define MT6328_VPA_CON4 0x0658 +#define MT6328_VPA_CON5 0x065a +#define MT6328_VPA_CON6 0x065c +#define MT6328_VPA_CON7 0x065e +#define MT6328_VPA_CON8 0x0660 +#define MT6328_VPA_CON9 0x0662 +#define MT6328_VPA_CON10 0x0664 +#define MT6328_VPA_CON11 0x0666 +#define MT6328_VPA_CON12 0x0668 +#define MT6328_VPA_CON13 0x066a +#define MT6328_VPA_CON14 0x066c +#define MT6328_VPA_CON15 0x066e +#define MT6328_VPA_CON16 0x0670 +#define MT6328_VPA_CON17 0x0672 +#define MT6328_VPA_CON18 0x0674 +#define MT6328_VPA_CON19 0x0676 +#define MT6328_VPA_CON20 0x0678 +#define MT6328_VPA_CON21 0x067a +#define MT6328_VPA_CON22 0x067c +#define MT6328_VPA_CON23 0x067e +#define MT6328_VPA_CON24 0x0680 +#define MT6328_BUCK_K_CON0 0x0682 +#define MT6328_BUCK_K_CON1 0x0684 +#define MT6328_BUCK_K_CON2 0x0686 +#define MT6328_BUCK_K_CON3 0x0688 +#define MT6328_ZCD_CON0 0x0800 +#define MT6328_ZCD_CON1 0x0802 +#define MT6328_ZCD_CON2 0x0804 +#define MT6328_ZCD_CON3 0x0806 +#define MT6328_ZCD_CON4 0x0808 +#define MT6328_ZCD_CON5 0x080a +#define MT6328_ISINK0_CON0 0x080c +#define MT6328_ISINK0_CON1 0x080e +#define MT6328_ISINK0_CON2 0x0810 +#define MT6328_ISINK0_CON3 0x0812 +#define MT6328_ISINK1_CON0 0x0814 +#define MT6328_ISINK1_CON1 0x0816 +#define MT6328_ISINK1_CON2 0x0818 +#define MT6328_ISINK1_CON3 0x081a +#define MT6328_ISINK2_CON1 0x081c +#define MT6328_ISINK3_CON1 0x081e +#define MT6328_ISINK_ANA0 0x0820 +#define MT6328_ISINK_ANA1 0x0822 +#define MT6328_ISINK_PHASE_DLY 0x0824 +#define MT6328_ISINK_SFSTR 0x0826 +#define MT6328_ISINK_EN_CTRL 0x0828 +#define MT6328_ISINK_MODE_CTRL 0x082a +#define MT6328_VTCXO_0_CON0 0x0a00 +#define MT6328_VTCXO_1_CON0 0x0a02 +#define MT6328_VAUD28_CON0 0x0a04 +#define MT6328_VAUX18_CON0 0x0a06 +#define MT6328_VRF18_0_CON0 0x0a08 +#define MT6328_VRF18_0_CON1 0x0a0a +#define MT6328_VCAMA_CON0 0x0a0c +#define MT6328_VCN28_CON0 0x0a0e +#define MT6328_VCN33_CON0 0x0a10 +#define MT6328_VCN33_CON1 0x0a12 +#define MT6328_VCN33_CON2 0x0a14 +#define MT6328_VRF18_1_CON0 0x0a16 +#define MT6328_VRF18_1_CON1 0x0a18 +#define MT6328_VUSB33_CON0 0x0a1a +#define MT6328_VMCH_CON0 0x0a1c +#define MT6328_VMCH_CON1 0x0a1e +#define MT6328_VMC_CON0 0x0a20 +#define MT6328_VMC_CON1 0x0a22 +#define MT6328_VEMC_3V3_CON0 0x0a24 +#define MT6328_VEMC_3V3_CON1 0x0a26 +#define MT6328_VIO28_CON0 0x0a28 +#define MT6328_VCAMAF_CON0 0x0a2a +#define MT6328_VGP1_CON0 0x0a2c +#define MT6328_VGP1_CON1 0x0a2e +#define MT6328_VEFUSE_CON0 0x0a30 +#define MT6328_VSIM1_CON0 0x0a32 +#define MT6328_VSIM2_CON0 0x0a34 +#define MT6328_VIO18_CON0 0x0a36 +#define MT6328_VIBR_CON0 0x0a38 +#define MT6328_VCN18_CON0 0x0a3a +#define MT6328_VCAM_CON0 0x0a3c +#define MT6328_VCAMIO_CON0 0x0a3e +#define MT6328_LDO_VSRAM_CON0 0x0a40 +#define MT6328_LDO_VSRAM_CON1 0x0a42 +#define MT6328_VTREF_CON0 0x0a44 +#define MT6328_VM_CON0 0x0a46 +#define MT6328_VM_CON1 0x0a48 +#define MT6328_VRTC_CON0 0x0a4a +#define MT6328_LDO_OCFB0 0x0a4c +#define MT6328_ALDO_ANA_CON0 0x0a4e +#define MT6328_ADLDO_ANA_CON1 0x0a50 +#define MT6328_ADLDO_ANA_CON2 0x0a52 +#define MT6328_ADLDO_ANA_CON3 0x0a54 +#define MT6328_ADLDO_ANA_CON4 0x0a56 +#define MT6328_ADLDO_ANA_CON5 0x0a58 +#define MT6328_ADLDO_ANA_CON6 0x0a5a +#define MT6328_ADLDO_ANA_CON7 0x0a5c +#define MT6328_ADLDO_ANA_CON8 0x0a5e +#define MT6328_ADLDO_ANA_CON9 0x0a60 +#define MT6328_ADLDO_ANA_CON10 0x0a62 +#define MT6328_ADLDO_ANA_CON11 0x0a64 +#define MT6328_ADLDO_ANA_CON12 0x0a66 +#define MT6328_ADLDO_ANA_CON13 0x0a68 +#define MT6328_DLDO_ANA_CON0 0x0a6a +#define MT6328_DLDO_ANA_CON1 0x0a6c +#define MT6328_DLDO_ANA_CON2 0x0a6e +#define MT6328_DLDO_ANA_CON3 0x0a70 +#define MT6328_DLDO_ANA_CON4 0x0a72 +#define MT6328_DLDO_ANA_CON5 0x0a74 +#define MT6328_SLDO_ANA_CON0 0x0a76 +#define MT6328_SLDO_ANA_CON1 0x0a78 +#define MT6328_SLDO_ANA_CON2 0x0a7a +#define MT6328_SLDO_ANA_CON3 0x0a7c +#define MT6328_SLDO_ANA_CON4 0x0a7e +#define MT6328_SLDO_ANA_CON5 0x0a80 +#define MT6328_SLDO_ANA_CON6 0x0a82 +#define MT6328_SLDO_ANA_CON7 0x0a84 +#define MT6328_SLDO_ANA_CON8 0x0a86 +#define MT6328_SLDO_ANA_CON9 0x0a88 +#define MT6328_SLDO_ANA_CON10 0x0a8a +#define MT6328_LDO_RSV_CON0 0x0a8c +#define MT6328_LDO_RSV_CON1 0x0a8e +#define MT6328_SPK_CON0 0x0a90 +#define MT6328_SPK_CON1 0x0a92 +#define MT6328_SPK_CON2 0x0a94 +#define MT6328_SPK_CON3 0x0a96 +#define MT6328_SPK_CON4 0x0a98 +#define MT6328_SPK_CON5 0x0a9a +#define MT6328_SPK_CON6 0x0a9c +#define MT6328_SPK_CON7 0x0a9e +#define MT6328_SPK_CON8 0x0aa0 +#define MT6328_SPK_CON9 0x0aa2 +#define MT6328_SPK_CON10 0x0aa4 +#define MT6328_SPK_CON11 0x0aa6 +#define MT6328_SPK_CON12 0x0aa8 +#define MT6328_SPK_CON13 0x0aaa +#define MT6328_SPK_CON14 0x0aac +#define MT6328_SPK_CON15 0x0aae +#define MT6328_SPK_CON16 0x0ab0 +#define MT6328_SPK_ANA_CON0 0x0ab2 +#define MT6328_SPK_ANA_CON1 0x0ab4 +#define MT6328_SPK_ANA_CON3 0x0ab6 +#define MT6328_OTP_CON0 0x0c00 +#define MT6328_OTP_CON1 0x0c02 +#define MT6328_OTP_CON2 0x0c04 +#define MT6328_OTP_CON3 0x0c06 +#define MT6328_OTP_CON4 0x0c08 +#define MT6328_OTP_CON5 0x0c0a +#define MT6328_OTP_CON6 0x0c0c +#define MT6328_OTP_CON7 0x0c0e +#define MT6328_OTP_CON8 0x0c10 +#define MT6328_OTP_CON9 0x0c12 +#define MT6328_OTP_CON10 0x0c14 +#define MT6328_OTP_CON11 0x0c16 +#define MT6328_OTP_CON12 0x0c18 +#define MT6328_OTP_CON13 0x0c1a +#define MT6328_OTP_CON14 0x0c1c +#define MT6328_OTP_DOUT_0_15 0x0c1e +#define MT6328_OTP_DOUT_16_31 0x0c20 +#define MT6328_OTP_DOUT_32_47 0x0c22 +#define MT6328_OTP_DOUT_48_63 0x0c24 +#define MT6328_OTP_DOUT_64_79 0x0c26 +#define MT6328_OTP_DOUT_80_95 0x0c28 +#define MT6328_OTP_DOUT_96_111 0x0c2a +#define MT6328_OTP_DOUT_112_127 0x0c2c +#define MT6328_OTP_DOUT_128_143 0x0c2e +#define MT6328_OTP_DOUT_144_159 0x0c30 +#define MT6328_OTP_DOUT_160_175 0x0c32 +#define MT6328_OTP_DOUT_176_191 0x0c34 +#define MT6328_OTP_DOUT_192_207 0x0c36 +#define MT6328_OTP_DOUT_208_223 0x0c38 +#define MT6328_OTP_DOUT_224_239 0x0c3a +#define MT6328_OTP_DOUT_240_255 0x0c3c +#define MT6328_OTP_DOUT_256_271 0x0c3e +#define MT6328_OTP_DOUT_272_287 0x0c40 +#define MT6328_OTP_DOUT_288_303 0x0c42 +#define MT6328_OTP_DOUT_304_319 0x0c44 +#define MT6328_OTP_DOUT_320_335 0x0c46 +#define MT6328_OTP_DOUT_336_351 0x0c48 +#define MT6328_OTP_DOUT_352_367 0x0c4a +#define MT6328_OTP_DOUT_368_383 0x0c4c +#define MT6328_OTP_DOUT_384_399 0x0c4e +#define MT6328_OTP_DOUT_400_415 0x0c50 +#define MT6328_OTP_DOUT_416_431 0x0c52 +#define MT6328_OTP_DOUT_432_447 0x0c54 +#define MT6328_OTP_DOUT_448_463 0x0c56 +#define MT6328_OTP_DOUT_464_479 0x0c58 +#define MT6328_OTP_DOUT_480_495 0x0c5a +#define MT6328_OTP_DOUT_496_511 0x0c5c +#define MT6328_OTP_VAL_0_15 0x0c5e +#define MT6328_OTP_VAL_16_31 0x0c60 +#define MT6328_OTP_VAL_32_47 0x0c62 +#define MT6328_OTP_VAL_48_63 0x0c64 +#define MT6328_OTP_VAL_64_79 0x0c66 +#define MT6328_OTP_VAL_80_95 0x0c68 +#define MT6328_OTP_VAL_96_111 0x0c6a +#define MT6328_OTP_VAL_112_127 0x0c6c +#define MT6328_OTP_VAL_128_143 0x0c6e +#define MT6328_OTP_VAL_144_159 0x0c70 +#define MT6328_OTP_VAL_160_175 0x0c72 +#define MT6328_OTP_VAL_176_191 0x0c74 +#define MT6328_OTP_VAL_192_207 0x0c76 +#define MT6328_OTP_VAL_208_223 0x0c78 +#define MT6328_OTP_VAL_224_239 0x0c7a +#define MT6328_OTP_VAL_240_255 0x0c7c +#define MT6328_OTP_VAL_256_271 0x0c7e +#define MT6328_OTP_VAL_272_287 0x0c80 +#define MT6328_OTP_VAL_288_303 0x0c82 +#define MT6328_OTP_VAL_304_319 0x0c84 +#define MT6328_OTP_VAL_320_335 0x0c86 +#define MT6328_OTP_VAL_336_351 0x0c88 +#define MT6328_OTP_VAL_352_367 0x0c8a +#define MT6328_OTP_VAL_368_383 0x0c8c +#define MT6328_OTP_VAL_384_399 0x0c8e +#define MT6328_OTP_VAL_400_415 0x0c90 +#define MT6328_OTP_VAL_416_431 0x0c92 +#define MT6328_OTP_VAL_432_447 0x0c94 +#define MT6328_OTP_VAL_448_463 0x0c96 +#define MT6328_OTP_VAL_464_479 0x0c98 +#define MT6328_OTP_VAL_480_495 0x0c9a +#define MT6328_OTP_VAL_496_511 0x0c9c +#define MT6328_RTC_MIX_CON0 0x0c9e +#define MT6328_RTC_MIX_CON1 0x0ca0 +#define MT6328_RTC_MIX_CON2 0x0ca2 +#define MT6328_FGADC_CON0 0x0ca4 +#define MT6328_FGADC_CON1 0x0ca6 +#define MT6328_FGADC_CON2 0x0ca8 +#define MT6328_FGADC_CON3 0x0caa +#define MT6328_FGADC_CON4 0x0cac +#define MT6328_FGADC_CON5 0x0cae +#define MT6328_FGADC_CON6 0x0cb0 +#define MT6328_FGADC_CON7 0x0cb2 +#define MT6328_FGADC_CON8 0x0cb4 +#define MT6328_FGADC_CON9 0x0cb6 +#define MT6328_FGADC_CON10 0x0cb8 +#define MT6328_FGADC_CON11 0x0cba +#define MT6328_FGADC_CON12 0x0cbc +#define MT6328_FGADC_CON13 0x0cbe +#define MT6328_FGADC_CON14 0x0cc0 +#define MT6328_FGADC_CON15 0x0cc2 +#define MT6328_FGADC_CON16 0x0cc4 +#define MT6328_FGADC_CON17 0x0cc6 +#define MT6328_FGADC_CON18 0x0cc8 +#define MT6328_FGADC_CON19 0x0cca +#define MT6328_FGADC_CON20 0x0ccc +#define MT6328_FGADC_CON21 0x0cce +#define MT6328_FGADC_CON22 0x0cd0 +#define MT6328_FGADC_CON23 0x0cd2 +#define MT6328_FGADC_CON24 0x0cd4 +#define MT6328_FGADC_CON25 0x0cd6 +#define MT6328_FGADC_CON26 0x0cd8 +#define MT6328_FGADC_CON27 0x0cda +#define MT6328_AUDDEC_ANA_CON0 0x0cdc +#define MT6328_AUDDEC_ANA_CON1 0x0cde +#define MT6328_AUDDEC_ANA_CON2 0x0ce0 +#define MT6328_AUDDEC_ANA_CON3 0x0ce2 +#define MT6328_AUDDEC_ANA_CON4 0x0ce4 +#define MT6328_AUDDEC_ANA_CON5 0x0ce6 +#define MT6328_AUDDEC_ANA_CON6 0x0ce8 +#define MT6328_AUDDEC_ANA_CON7 0x0cea +#define MT6328_AUDDEC_ANA_CON8 0x0cec +#define MT6328_AUDENC_ANA_CON0 0x0cee +#define MT6328_AUDENC_ANA_CON1 0x0cf0 +#define MT6328_AUDENC_ANA_CON2 0x0cf2 +#define MT6328_AUDENC_ANA_CON3 0x0cf4 +#define MT6328_AUDENC_ANA_CON4 0x0cf6 +#define MT6328_AUDENC_ANA_CON5 0x0cf8 +#define MT6328_AUDENC_ANA_CON6 0x0cfa +#define MT6328_AUDENC_ANA_CON7 0x0cfc +#define MT6328_AUDENC_ANA_CON8 0x0cfe +#define MT6328_AUDENC_ANA_CON9 0x0d00 +#define MT6328_AUDENC_ANA_CON10 0x0d02 +#define MT6328_AUDNCP_CLKDIV_CON0 0x0d04 +#define MT6328_AUDNCP_CLKDIV_CON1 0x0d06 +#define MT6328_AUDNCP_CLKDIV_CON2 0x0d08 +#define MT6328_AUDNCP_CLKDIV_CON3 0x0d0a +#define MT6328_AUDNCP_CLKDIV_CON4 0x0d0c +#define MT6328_AUXADC_ADC0 0x0e00 +#define MT6328_AUXADC_ADC1 0x0e02 +#define MT6328_AUXADC_ADC2 0x0e04 +#define MT6328_AUXADC_ADC3 0x0e06 +#define MT6328_AUXADC_ADC4 0x0e08 +#define MT6328_AUXADC_ADC5 0x0e0a +#define MT6328_AUXADC_ADC6 0x0e0c +#define MT6328_AUXADC_ADC7 0x0e0e +#define MT6328_AUXADC_ADC8 0x0e10 +#define MT6328_AUXADC_ADC9 0x0e12 +#define MT6328_AUXADC_ADC10 0x0e14 +#define MT6328_AUXADC_ADC11 0x0e16 +#define MT6328_AUXADC_ADC12 0x0e18 +#define MT6328_AUXADC_ADC13 0x0e1a +#define MT6328_AUXADC_ADC14 0x0e1c +#define MT6328_AUXADC_ADC15 0x0e1e +#define MT6328_AUXADC_ADC16 0x0e20 +#define MT6328_AUXADC_ADC17 0x0e22 +#define MT6328_AUXADC_ADC18 0x0e24 +#define MT6328_AUXADC_ADC19 0x0e26 +#define MT6328_AUXADC_ADC20 0x0e28 +#define MT6328_AUXADC_ADC21 0x0e2a +#define MT6328_AUXADC_ADC22 0x0e2c +#define MT6328_AUXADC_ADC23 0x0e2e +#define MT6328_AUXADC_ADC24 0x0e30 +#define MT6328_AUXADC_ADC25 0x0e32 +#define MT6328_AUXADC_ADC26 0x0e34 +#define MT6328_AUXADC_ADC27 0x0e36 +#define MT6328_AUXADC_ADC28 0x0e38 +#define MT6328_AUXADC_ADC29 0x0e3a +#define MT6328_AUXADC_ADC30 0x0e3c +#define MT6328_AUXADC_ADC31 0x0e3e +#define MT6328_AUXADC_ADC32 0x0e40 +#define MT6328_AUXADC_ADC33 0x0e42 +#define MT6328_AUXADC_BUF0 0x0e44 +#define MT6328_AUXADC_BUF1 0x0e46 +#define MT6328_AUXADC_BUF2 0x0e48 +#define MT6328_AUXADC_BUF3 0x0e4a +#define MT6328_AUXADC_BUF4 0x0e4c +#define MT6328_AUXADC_BUF5 0x0e4e +#define MT6328_AUXADC_BUF6 0x0e50 +#define MT6328_AUXADC_BUF7 0x0e52 +#define MT6328_AUXADC_BUF8 0x0e54 +#define MT6328_AUXADC_BUF9 0x0e56 +#define MT6328_AUXADC_BUF10 0x0e58 +#define MT6328_AUXADC_BUF11 0x0e5a +#define MT6328_AUXADC_BUF12 0x0e5c +#define MT6328_AUXADC_BUF13 0x0e5e +#define MT6328_AUXADC_BUF14 0x0e60 +#define MT6328_AUXADC_BUF15 0x0e62 +#define MT6328_AUXADC_BUF16 0x0e64 +#define MT6328_AUXADC_BUF17 0x0e66 +#define MT6328_AUXADC_BUF18 0x0e68 +#define MT6328_AUXADC_BUF19 0x0e6a +#define MT6328_AUXADC_BUF20 0x0e6c +#define MT6328_AUXADC_BUF21 0x0e6e +#define MT6328_AUXADC_BUF22 0x0e70 +#define MT6328_AUXADC_BUF23 0x0e72 +#define MT6328_AUXADC_BUF24 0x0e74 +#define MT6328_AUXADC_BUF25 0x0e76 +#define MT6328_AUXADC_BUF26 0x0e78 +#define MT6328_AUXADC_BUF27 0x0e7a +#define MT6328_AUXADC_BUF28 0x0e7c +#define MT6328_AUXADC_BUF29 0x0e7e +#define MT6328_AUXADC_BUF30 0x0e80 +#define MT6328_AUXADC_BUF31 0x0e82 +#define MT6328_AUXADC_STA0 0x0e84 +#define MT6328_AUXADC_STA1 0x0e86 +#define MT6328_AUXADC_RQST0 0x0e88 +#define MT6328_AUXADC_RQST0_SET 0x0e8a +#define MT6328_AUXADC_RQST0_CLR 0x0e8c +#define MT6328_AUXADC_RQST1 0x0e8e +#define MT6328_AUXADC_RQST1_SET 0x0e90 +#define MT6328_AUXADC_RQST1_CLR 0x0e92 +#define MT6328_AUXADC_CON0 0x0e94 +#define MT6328_AUXADC_CON0_SET 0x0e96 +#define MT6328_AUXADC_CON0_CLR 0x0e98 +#define MT6328_AUXADC_CON1 0x0e9a +#define MT6328_AUXADC_CON2 0x0e9c +#define MT6328_AUXADC_CON3 0x0e9e +#define MT6328_AUXADC_CON4 0x0ea0 +#define MT6328_AUXADC_CON5 0x0ea2 +#define MT6328_AUXADC_CON6 0x0ea4 +#define MT6328_AUXADC_CON7 0x0ea6 +#define MT6328_AUXADC_CON8 0x0ea8 +#define MT6328_AUXADC_CON9 0x0eaa +#define MT6328_AUXADC_CON10 0x0eac +#define MT6328_AUXADC_CON11 0x0eae +#define MT6328_AUXADC_CON12 0x0eb0 +#define MT6328_AUXADC_CON13 0x0eb2 +#define MT6328_AUXADC_CON14 0x0eb4 +#define MT6328_AUXADC_CON15 0x0eb6 +#define MT6328_AUXADC_CON16 0x0eb8 +#define MT6328_AUXADC_AUTORPT0 0x0eba +#define MT6328_AUXADC_LBAT0 0x0ebc +#define MT6328_AUXADC_LBAT1 0x0ebe +#define MT6328_AUXADC_LBAT2 0x0ec0 +#define MT6328_AUXADC_LBAT3 0x0ec2 +#define MT6328_AUXADC_LBAT4 0x0ec4 +#define MT6328_AUXADC_LBAT5 0x0ec6 +#define MT6328_AUXADC_LBAT6 0x0ec8 +#define MT6328_AUXADC_ACCDET 0x0eca +#define MT6328_AUXADC_THR0 0x0ecc +#define MT6328_AUXADC_THR1 0x0ece +#define MT6328_AUXADC_THR2 0x0ed0 +#define MT6328_AUXADC_THR3 0x0ed2 +#define MT6328_AUXADC_THR4 0x0ed4 +#define MT6328_AUXADC_THR5 0x0ed6 +#define MT6328_AUXADC_THR6 0x0ed8 +#define MT6328_AUXADC_EFUSE0 0x0eda +#define MT6328_AUXADC_EFUSE1 0x0edc +#define MT6328_AUXADC_EFUSE2 0x0ede +#define MT6328_AUXADC_EFUSE3 0x0ee0 +#define MT6328_AUXADC_EFUSE4 0x0ee2 +#define MT6328_AUXADC_EFUSE5 0x0ee4 +#define MT6328_AUXADC_DBG0 0x0ee6 +#define MT6328_AUXADC_IMP0 0x0ee8 +#define MT6328_AUXADC_IMP1 0x0eea +#define MT6328_AUXADC_VISMPS0_1 0x0eec +#define MT6328_AUXADC_VISMPS0_2 0x0eee +#define MT6328_AUXADC_VISMPS0_3 0x0ef0 +#define MT6328_AUXADC_VISMPS0_4 0x0ef2 +#define MT6328_AUXADC_VISMPS0_5 0x0ef4 +#define MT6328_AUXADC_VISMPS0_6 0x0ef6 +#define MT6328_AUXADC_VISMPS0_7 0x0ef8 +#define MT6328_AUXADC_LBAT2_1 0x0efa +#define MT6328_AUXADC_LBAT2_2 0x0efc +#define MT6328_AUXADC_LBAT2_3 0x0efe +#define MT6328_AUXADC_LBAT2_4 0x0f00 +#define MT6328_AUXADC_LBAT2_5 0x0f02 +#define MT6328_AUXADC_LBAT2_6 0x0f04 +#define MT6328_AUXADC_LBAT2_7 0x0f06 +#define MT6328_AUXADC_MDBG_0 0x0f08 +#define MT6328_AUXADC_MDBG_1 0x0f0a +#define MT6328_AUXADC_MDBG_2 0x0f0c +#define MT6328_AUXADC_MDRT_0 0x0f0e +#define MT6328_AUXADC_MDRT_1 0x0f10 +#define MT6328_AUXADC_MDRT_2 0x0f12 +#define MT6328_ACCDET_CON0 0x0f14 +#define MT6328_ACCDET_CON1 0x0f16 +#define MT6328_ACCDET_CON2 0x0f18 +#define MT6328_ACCDET_CON3 0x0f1a +#define MT6328_ACCDET_CON4 0x0f1c +#define MT6328_ACCDET_CON5 0x0f1e +#define MT6328_ACCDET_CON6 0x0f20 +#define MT6328_ACCDET_CON7 0x0f22 +#define MT6328_ACCDET_CON8 0x0f24 +#define MT6328_ACCDET_CON9 0x0f26 +#define MT6328_ACCDET_CON10 0x0f28 +#define MT6328_ACCDET_CON11 0x0f2a +#define MT6328_ACCDET_CON12 0x0f2c +#define MT6328_ACCDET_CON13 0x0f2e +#define MT6328_ACCDET_CON14 0x0f30 +#define MT6328_ACCDET_CON15 0x0f32 +#define MT6328_ACCDET_CON16 0x0f34 +#define MT6328_ACCDET_CON17 0x0f36 +#define MT6328_ACCDET_CON18 0x0f38 +#define MT6328_ACCDET_CON19 0x0f3a +#define MT6328_ACCDET_CON20 0x0f3c +#define MT6328_ACCDET_CON21 0x0f3e +#define MT6328_ACCDET_CON22 0x0f40 +#define MT6328_ACCDET_CON23 0x0f42 +#define MT6328_ACCDET_CON24 0x0f44 +#define MT6328_ACCDET_CON25 0x0f46 +#define MT6328_CHR_CON0 0x0f48 +#define MT6328_CHR_CON1 0x0f4a +#define MT6328_CHR_CON2 0x0f4c +#define MT6328_CHR_CON3 0x0f4e +#define MT6328_CHR_CON4 0x0f50 +#define MT6328_CHR_CON5 0x0f52 +#define MT6328_CHR_CON6 0x0f54 +#define MT6328_CHR_CON7 0x0f56 +#define MT6328_CHR_CON8 0x0f58 +#define MT6328_CHR_CON9 0x0f5a +#define MT6328_CHR_CON10 0x0f5c +#define MT6328_CHR_CON11 0x0f5e +#define MT6328_CHR_CON12 0x0f60 +#define MT6328_CHR_CON13 0x0f62 +#define MT6328_CHR_CON14 0x0f64 +#define MT6328_CHR_CON15 0x0f66 +#define MT6328_CHR_CON16 0x0f68 +#define MT6328_CHR_CON17 0x0f6a +#define MT6328_CHR_CON18 0x0f6c +#define MT6328_CHR_CON19 0x0f6e +#define MT6328_CHR_CON20 0x0f70 +#define MT6328_CHR_CON21 0x0f72 +#define MT6328_CHR_CON22 0x0f74 +#define MT6328_CHR_CON23 0x0f76 +#define MT6328_CHR_CON24 0x0f78 +#define MT6328_CHR_CON25 0x0f7a +#define MT6328_CHR_CON26 0x0f7c +#define MT6328_CHR_CON27 0x0f7e +#define MT6328_CHR_CON28 0x0f80 +#define MT6328_CHR_CON29 0x0f82 +#define MT6328_CHR_CON30 0x0f84 +#define MT6328_CHR_CON31 0x0f86 +#define MT6328_CHR_CON32 0x0f88 +#define MT6328_CHR_CON33 0x0f8a +#define MT6328_CHR_CON34 0x0f8c +#define MT6328_CHR_CON35 0x0f8e +#define MT6328_CHR_CON36 0x0f90 +#define MT6328_CHR_CON37 0x0f92 +#define MT6328_CHR_CON38 0x0f94 +#define MT6328_CHR_CON39 0x0f96 +#define MT6328_CHR_CON40 0x0f98 +#define MT6328_CHR_CON41 0x0f9a +#define MT6328_CHR_CON42 0x0f9c +#define MT6328_BATON_CON0 0x0f9e +#define MT6328_CHR_CON43 0x0fa0 +#define MT6328_EOSC_CALI_CON0 0x0faa +#define MT6328_EOSC_CALI_CON1 0x0fac +#define MT6328_VRTC_PWM_CON0 0x0fae + +#endif /* __MFD_MT6328_REGISTERS_H__ */ diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h index 627487e26287..b774c3a4bb62 100644 --- a/include/linux/mfd/mt6397/core.h +++ b/include/linux/mfd/mt6397/core.h @@ -12,6 +12,7 @@ enum chip_id { MT6323_CHIP_ID = 0x23, + MT6328_CHIP_ID = 0x30, MT6331_CHIP_ID = 0x20, MT6332_CHIP_ID = 0x20, MT6357_CHIP_ID = 0x57, @@ -65,11 +66,11 @@ struct mt6397_chip { int irq; struct irq_domain *irq_domain; struct mutex irqlock; - u16 wake_mask[2]; - u16 irq_masks_cur[2]; - u16 irq_masks_cache[2]; - u16 int_con[2]; - u16 int_status[2]; + u16 wake_mask[3]; + u16 irq_masks_cur[3]; + u16 irq_masks_cache[3]; + u16 int_con[3]; + u16 int_status[3]; u16 chip_id; void *irq_data; }; diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index eda1ffd99c1a..dabcc0dea802 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -98,8 +98,8 @@ struct palmas_sleep_requestor_info { }; struct palmas_regs_info { - char *name; - char *sname; + const char *name; + const char *sname; u8 vsel_addr; u8 ctrl_addr; u8 tstep_addr; diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index a212b9f72bc9..750274d41fc0 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -37,6 +37,7 @@ struct gpio_desc; enum sec_device_type { S5M8767X, + S2DOS05, S2MPA01, S2MPS11X, S2MPS13X, diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 644be30b69c8..002e49b2ebd9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -70,6 +70,7 @@ int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free, unsigned int *ret_succeeded); struct folio *alloc_migration_target(struct folio *src, unsigned long private); bool isolate_movable_page(struct page *page, isolate_mode_t mode); +bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); @@ -91,6 +92,8 @@ static inline struct folio *alloc_migration_target(struct folio *src, { return NULL; } static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode) { return false; } +static inline bool isolate_folio_to_list(struct folio *folio, struct list_head *list) + { return false; } static inline int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) diff --git a/include/linux/mii.h b/include/linux/mii.h index d5a959ce4877..b8f26d4513c3 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -140,7 +140,7 @@ static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv) * settings to phy autonegotiation advertisements for the * MII_ADVERTISE register. */ -static inline u32 linkmode_adv_to_mii_adv_t(unsigned long *advertising) +static inline u32 linkmode_adv_to_mii_adv_t(const unsigned long *advertising) { u32 result = 0; @@ -215,7 +215,8 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv) * settings to phy autonegotiation advertisements for the * MII_CTRL1000 register when in 1000T mode. */ -static inline u32 linkmode_adv_to_mii_ctrl1000_t(unsigned long *advertising) +static inline u32 +linkmode_adv_to_mii_ctrl1000_t(const unsigned long *advertising) { u32 result = 0; @@ -453,7 +454,7 @@ static inline void mii_ctrl1000_mod_linkmode_adv_t(unsigned long *advertising, * A small helper function that translates linkmode advertising to LVL * pause capabilities. */ -static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising) +static inline u32 linkmode_adv_to_lcl_adv_t(const unsigned long *advertising) { u32 lcl_adv = 0; diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 43a7b9dcf15e..e781727c8916 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -38,9 +38,176 @@ struct min_heap_callbacks { void (*swp)(void *lhs, void *rhs, void *args); }; +/** + * is_aligned - is this pointer & size okay for word-wide copying? + * @base: pointer to data + * @size: size of each element + * @align: required alignment (typically 4 or 8) + * + * Returns true if elements can be copied using word loads and stores. + * The size must be a multiple of the alignment, and the base address must + * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + * + * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" + * to "if ((a | b) & mask)", so we do that by hand. + */ +__attribute_const__ __always_inline +static bool is_aligned(const void *base, size_t size, unsigned char align) +{ + unsigned char lsbits = (unsigned char)size; + + (void)base; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + lsbits |= (unsigned char)(uintptr_t)base; +#endif + return (lsbits & (align - 1)) == 0; +} + +/** + * swap_words_32 - swap two elements in 32-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 4) + * + * Exchange the two objects in memory. This exploits base+index addressing, + * which basically all CPUs have, to minimize loop overhead computations. + * + * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the + * bottom of the loop, even though the zero flag is still valid from the + * subtract (since the intervening mov instructions don't alter the flags). + * Gcc 8.1.0 doesn't have that problem. + */ +static __always_inline +void swap_words_32(void *a, void *b, size_t n) +{ + do { + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + } while (n); +} + +/** + * swap_words_64 - swap two elements in 64-bit chunks + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size (must be a multiple of 8) + * + * Exchange the two objects in memory. This exploits base+index + * addressing, which basically all CPUs have, to minimize loop overhead + * computations. + * + * We'd like to use 64-bit loads if possible. If they're not, emulating + * one requires base+index+4 addressing which x86 has but most other + * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, + * but it's possible to have 64-bit loads without 64-bit pointers (e.g. + * x32 ABI). Are there any cases the kernel needs to worry about? + */ +static __always_inline +void swap_words_64(void *a, void *b, size_t n) +{ + do { +#ifdef CONFIG_64BIT + u64 t = *(u64 *)(a + (n -= 8)); + *(u64 *)(a + n) = *(u64 *)(b + n); + *(u64 *)(b + n) = t; +#else + /* Use two 32-bit transfers to avoid base+index+4 addressing */ + u32 t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; + + t = *(u32 *)(a + (n -= 4)); + *(u32 *)(a + n) = *(u32 *)(b + n); + *(u32 *)(b + n) = t; +#endif + } while (n); +} + +/** + * swap_bytes - swap two elements a byte at a time + * @a: pointer to the first element to swap + * @b: pointer to the second element to swap + * @n: element size + * + * This is the fallback if alignment doesn't allow using larger chunks. + */ +static __always_inline +void swap_bytes(void *a, void *b, size_t n) +{ + do { + char t = ((char *)a)[--n]; + ((char *)a)[n] = ((char *)b)[n]; + ((char *)b)[n] = t; + } while (n); +} + +/* + * The values are arbitrary as long as they can't be confused with + * a pointer, but small integers make for the smallest compare + * instructions. + */ +#define SWAP_WORDS_64 ((void (*)(void *, void *, void *))0) +#define SWAP_WORDS_32 ((void (*)(void *, void *, void *))1) +#define SWAP_BYTES ((void (*)(void *, void *, void *))2) + +/* + * Selects the appropriate swap function based on the element size. + */ +static __always_inline +void *select_swap_func(const void *base, size_t size) +{ + if (is_aligned(base, size, 8)) + return SWAP_WORDS_64; + else if (is_aligned(base, size, 4)) + return SWAP_WORDS_32; + else + return SWAP_BYTES; +} + +static __always_inline +void do_swap(void *a, void *b, size_t size, void (*swap_func)(void *lhs, void *rhs, void *args), + void *priv) +{ + if (swap_func == SWAP_WORDS_64) + swap_words_64(a, b, size); + else if (swap_func == SWAP_WORDS_32) + swap_words_32(a, b, size); + else if (swap_func == SWAP_BYTES) + swap_bytes(a, b, size); + else + swap_func(a, b, priv); +} + +/** + * parent - given the offset of the child, find the offset of the parent. + * @i: the offset of the heap element whose parent is sought. Non-zero. + * @lsbit: a precomputed 1-bit mask, equal to "size & -size" + * @size: size of each element + * + * In terms of array indexes, the parent of element j = @i/@size is simply + * (j-1)/2. But when working in byte offsets, we can't use implicit + * truncation of integer divides. + * + * Fortunately, we only need one bit of the quotient, not the full divide. + * @size has a least significant bit. That bit will be clear if @i is + * an even multiple of @size, and set if it's an odd multiple. + * + * Logically, we're doing "if (i & lsbit) i -= size;", but since the + * branch is unpredictable, it's done with a bit of clever branch-free + * code instead. + */ +__attribute_const__ __always_inline +static size_t parent(size_t i, unsigned int lsbit, size_t size) +{ + i -= size; + i -= size & -(i & lsbit); + return i / 2; +} + /* Initialize a min-heap. */ static __always_inline -void __min_heap_init(min_heap_char *heap, void *data, int size) +void __min_heap_init_inline(min_heap_char *heap, void *data, int size) { heap->nr = 0; heap->size = size; @@ -50,105 +217,114 @@ void __min_heap_init(min_heap_char *heap, void *data, int size) heap->data = heap->preallocated; } -#define min_heap_init(_heap, _data, _size) \ - __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_init_inline(_heap, _data, _size) \ + __min_heap_init_inline((min_heap_char *)_heap, _data, _size) /* Get the minimum element from the heap. */ static __always_inline -void *__min_heap_peek(struct min_heap_char *heap) +void *__min_heap_peek_inline(struct min_heap_char *heap) { return heap->nr ? heap->data : NULL; } -#define min_heap_peek(_heap) \ - (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_peek_inline(_heap) \ + (__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap)) /* Check if the heap is full. */ static __always_inline -bool __min_heap_full(min_heap_char *heap) +bool __min_heap_full_inline(min_heap_char *heap) { return heap->nr == heap->size; } -#define min_heap_full(_heap) \ - __min_heap_full((min_heap_char *)_heap) +#define min_heap_full_inline(_heap) \ + __min_heap_full_inline((min_heap_char *)_heap) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { - void *left, *right; + const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; - void *root = data + pos * elem_size; - int i = pos, j; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; + /* pre-scale counters for performance */ + size_t a = pos * elem_size; + size_t b, c, d; + size_t n = heap->nr * elem_size; + + if (!swp) + swp = select_swap_func(data, elem_size); /* Find the sift-down path all the way to the leaves. */ - for (;;) { - if (i * 2 + 2 >= heap->nr) - break; - left = data + (i * 2 + 1) * elem_size; - right = data + (i * 2 + 2) * elem_size; - i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2; - } + for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;) + b = func->less(data + c, data + d, args) ? c : d; /* Special case for the last leaf with no sibling. */ - if (i * 2 + 2 == heap->nr) - i = i * 2 + 1; + if (d == n) + b = c; /* Backtrack to the correct location. */ - while (i != pos && func->less(root, data + i * elem_size, args)) - i = (i - 1) / 2; + while (b != a && func->less(data + a, data + b, args)) + b = parent(b, lsbit, elem_size); /* Shift the element into its correct place. */ - j = i; - while (i != pos) { - i = (i - 1) / 2; - func->swp(data + i * elem_size, data + j * elem_size, args); + c = b; + while (b != a) { + b = parent(b, lsbit, elem_size); + do_swap(data + b, data + c, elem_size, swp, args); } } -#define min_heap_sift_down(_heap, _pos, _func, _args) \ - __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_down_inline(_heap, _pos, _func, _args) \ + __min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \ + _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline -void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { + const unsigned long lsbit = elem_size & -elem_size; void *data = heap->data; - size_t parent; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; + /* pre-scale counters for performance */ + size_t a = idx * elem_size, b; + + if (!swp) + swp = select_swap_func(data, elem_size); - while (idx) { - parent = (idx - 1) / 2; - if (func->less(data + parent * elem_size, data + idx * elem_size, args)) + while (a) { + b = parent(a, lsbit, elem_size); + if (func->less(data + b, data + a, args)) break; - func->swp(data + parent * elem_size, data + idx * elem_size, args); - idx = parent; + do_swap(data + a, data + b, elem_size, swp, args); + a = b; } } -#define min_heap_sift_up(_heap, _idx, _func, _args) \ - __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heap_sift_up_inline(_heap, _idx, _func, _args) \ + __min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline -void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heap_sift_down(heap, i, elem_size, func, args); + __min_heap_sift_down_inline(heap, i, elem_size, func, args); } -#define min_heapify_all(_heap, _func, _args) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heapify_all_inline(_heap, _func, _args) \ + __min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -158,13 +334,13 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); return true; } -#define min_heap_pop(_heap, _func, _args) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_inline(_heap, _func, _args) \ + __min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -172,22 +348,21 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, * efficient than a pop followed by a push that does 2. */ static __always_inline -void __min_heap_pop_push(min_heap_char *heap, - const void *element, size_t elem_size, - const struct min_heap_callbacks *func, - void *args) +void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { memcpy(heap->data, element, elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); } -#define min_heap_pop_push(_heap, _element, _func, _args) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push_inline(_heap, _element, _func, _args) \ + __min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; int pos; @@ -201,35 +376,81 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, heap->nr++; /* Sift child at pos up. */ - __min_heap_sift_up(heap, elem_size, pos, func, args); + __min_heap_sift_up_inline(heap, elem_size, pos, func, args); return true; } -#define min_heap_push(_heap, _element, _func, _args) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_push_inline(_heap, _element, _func, _args) \ + __min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Remove ith element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; + void (*swp)(void *lhs, void *rhs, void *args) = func->swp; if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) return false; + if (!swp) + swp = select_swap_func(data, elem_size); + /* Place last element at the root (position 0) and then sift down. */ heap->nr--; if (idx == heap->nr) return true; - func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); - __min_heap_sift_up(heap, elem_size, idx, func, args); - __min_heap_sift_down(heap, idx, elem_size, func, args); + do_swap(data + (idx * elem_size), data + (heap->nr * elem_size), elem_size, swp, args); + __min_heap_sift_up_inline(heap, elem_size, idx, func, args); + __min_heap_sift_down_inline(heap, idx, elem_size, func, args); return true; } +#define min_heap_del_inline(_heap, _idx, _func, _args) \ + __min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) + +void __min_heap_init(min_heap_char *heap, void *data, int size); +void *__min_heap_peek(struct min_heap_char *heap); +bool __min_heap_full(min_heap_char *heap); +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); +void __min_heapify_all(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); + +#define min_heap_init(_heap, _data, _size) \ + __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_peek(_heap) \ + (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_full(_heap) \ + __min_heap_full((min_heap_char *)_heap) +#define min_heap_sift_down(_heap, _pos, _func, _args) \ + __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_up(_heap, _idx, _func, _args) \ + __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heapify_all(_heap, _func, _args) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop(_heap, _func, _args) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push(_heap, _element, _func, _args) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) +#define min_heap_push(_heap, _element, _func, _args) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) #define min_heap_del(_heap, _idx, _func, _args) \ __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h index 323b31a847c5..a81d6fa70851 100644 --- a/include/linux/misc/keba.h +++ b/include/linux/misc/keba.h @@ -7,6 +7,7 @@ #include <linux/auxiliary_bus.h> struct i2c_board_info; +struct spi_board_info; /** * struct keba_i2c_auxdev - KEBA I2C auxiliary device @@ -22,4 +23,50 @@ struct keba_i2c_auxdev { struct i2c_board_info *info; }; +/** + * struct keba_spi_auxdev - KEBA SPI auxiliary device + * @auxdev: auxiliary device object + * @io: address range of SPI controller IO memory + * @info_size: number of SPI devices to be probed + * @info: SPI devices to be probed + */ +struct keba_spi_auxdev { + struct auxiliary_device auxdev; + struct resource io; + int info_size; + struct spi_board_info *info; +}; + +/** + * struct keba_fan_auxdev - KEBA fan auxiliary device + * @auxdev: auxiliary device object + * @io: address range of fan controller IO memory + */ +struct keba_fan_auxdev { + struct auxiliary_device auxdev; + struct resource io; +}; + +/** + * struct keba_batt_auxdev - KEBA battery auxiliary device + * @auxdev: auxiliary device object + * @io: address range of battery controller IO memory + */ +struct keba_batt_auxdev { + struct auxiliary_device auxdev; + struct resource io; +}; + +/** + * struct keba_uart_auxdev - KEBA UART auxiliary device + * @auxdev: auxiliary device object + * @io: address range of UART controller IO memory + * @irq: number of UART controller interrupt + */ +struct keba_uart_auxdev { + struct auxiliary_device auxdev; + struct resource io; + unsigned int irq; +}; + #endif /* _LINUX_MISC_KEBA_H */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ba875a619b97..cc647992f3d1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -211,6 +211,7 @@ enum { enum { MLX5_PFAULT_SUBTYPE_WQE = 0, MLX5_PFAULT_SUBTYPE_RDMA = 1, + MLX5_PFAULT_SUBTYPE_MEMORY = 2, }; enum wqe_page_fault_type { @@ -370,6 +371,7 @@ enum mlx5_driver_event { MLX5_DRIVER_EVENT_SF_PEER_DEVLINK, MLX5_DRIVER_EVENT_AFFILIATION_DONE, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, + MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, }; enum { @@ -646,10 +648,11 @@ struct mlx5_eqe_page_req { __be32 rsvd1[5]; }; +#define MEMORY_SCHEME_PAGE_FAULT_GRANULARITY 4096 struct mlx5_eqe_page_fault { - __be32 bytes_committed; union { struct { + __be32 bytes_committed; u16 reserved1; __be16 wqe_index; u16 reserved2; @@ -659,6 +662,7 @@ struct mlx5_eqe_page_fault { __be32 pftype_wq; } __packed wqe; struct { + __be32 bytes_committed; __be32 r_key; u16 reserved1; __be16 packet_length; @@ -666,6 +670,23 @@ struct mlx5_eqe_page_fault { __be64 rdma_va; __be32 pftype_token; } __packed rdma; + struct { + u8 flags; + u8 reserved1; + __be16 post_demand_fault_pages; + __be16 pre_demand_fault_pages; + __be16 token47_32; + __be32 token31_0; + /* + * FW changed from specifying the fault size in byte + * count to 4k pages granularity. The size specified + * in pages uses bits 31:12, to keep backward + * compatibility. + */ + __be32 demand_fault_pages; + __be32 mkey; + __be64 va; + } __packed memory; } __packed; } __packed; @@ -1243,7 +1264,8 @@ enum mlx5_pcam_feature_groups { enum mlx5_mcam_reg_groups { MLX5_MCAM_REGS_FIRST_128 = 0x0, MLX5_MCAM_REGS_0x9100_0x917F = 0x2, - MLX5_MCAM_REGS_NUM = 0x3, + MLX5_MCAM_REGS_0x9180_0x91FF = 0x3, + MLX5_MCAM_REGS_NUM = 0x4, }; enum mlx5_mcam_feature_groups { @@ -1369,6 +1391,14 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) +#define MLX5_CAP_ODP_SCHEME(mdev, cap) \ + (MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ + mem_page_fault) ? \ + MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ + memory_page_fault_scheme_cap.cap) : \ + MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, \ + transport_page_fault_scheme_cap.cap)) + #define MLX5_CAP_ODP_MAX(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->max, cap) @@ -1392,6 +1422,10 @@ enum mlx5_qcam_feature_groups { MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9100_0x917F], \ mng_access_reg_cap_mask.access_regs2.reg) +#define MLX5_CAP_MCAM_REG3(mdev, reg) \ + MLX5_GET(mcam_reg, (mdev)->caps.mcam[MLX5_MCAM_REGS_0x9180_0x91FF], \ + mng_access_reg_cap_mask.access_regs3.reg) + #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) @@ -1444,6 +1478,7 @@ enum { MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4, MLX5_CMD_STAT_BAD_RES_ERR = 0x5, MLX5_CMD_STAT_RES_BUSY = 0x6, + MLX5_CMD_STAT_NOT_READY = 0x7, MLX5_CMD_STAT_LIM_ERR = 0x8, MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9, MLX5_CMD_STAT_IX_ERR = 0xa, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a96438ded15f..fc7e6153b73d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -45,7 +45,6 @@ #include <linux/workqueue.h> #include <linux/mempool.h> #include <linux/interrupt.h> -#include <linux/idr.h> #include <linux/notifier.h> #include <linux/refcount.h> #include <linux/auxiliary_bus.h> @@ -159,6 +158,8 @@ enum { MLX5_REG_MSECQ = 0x9155, MLX5_REG_MSEES = 0x9156, MLX5_REG_MIRC = 0x9162, + MLX5_REG_MTPTM = 0x9180, + MLX5_REG_MTCTR = 0x9181, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, MLX5_REG_DTOR = 0xC00E, @@ -472,36 +473,6 @@ struct mlx5_core_sriov { u16 max_ec_vfs; }; -struct mlx5_fc_pool { - struct mlx5_core_dev *dev; - struct mutex pool_lock; /* protects pool lists */ - struct list_head fully_used; - struct list_head partially_used; - struct list_head unused; - int available_fcs; - int used_fcs; - int threshold; -}; - -struct mlx5_fc_stats { - spinlock_t counters_idr_lock; /* protects counters_idr */ - struct idr counters_idr; - struct list_head counters; - struct llist_head addlist; - struct llist_head dellist; - - struct workqueue_struct *wq; - struct delayed_work work; - unsigned long next_query; - unsigned long sampling_interval; /* jiffies */ - u32 *bulk_query_out; - int bulk_query_len; - size_t num_counters; - bool bulk_query_alloc_failed; - unsigned long next_bulk_query_alloc; - struct mlx5_fc_pool fc_pool; -}; - struct mlx5_events; struct mlx5_mpfs; struct mlx5_eswitch; @@ -628,7 +599,7 @@ struct mlx5_priv { struct mlx5_devcom_comp_dev *hca_devcom_comp; struct mlx5_fw_reset *fw_reset; struct mlx5_core_roce roce; - struct mlx5_fc_stats fc_stats; + struct mlx5_fc_stats *fc_stats; struct mlx5_rl_table rl_table; struct mlx5_ft_pool *ft_pool; @@ -643,6 +614,7 @@ struct mlx5_priv { struct mlx5_sf_hw_table *sf_hw_table; struct mlx5_sf_table *sf_table; #endif + struct blocking_notifier_head lag_nh; }; enum mlx5_device_state { @@ -1181,7 +1153,6 @@ bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); -struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 3fb428ce7d1c..438db888bde0 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -298,9 +298,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); -/* As mlx5_fc_create() but doesn't queue stats refresh thread. */ -struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging); - void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, @@ -342,4 +339,7 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, struct mlx5_pkt_reformat *reformat); u32 mlx5_flow_table_id(struct mlx5_flow_table *ft); + +struct mlx5_flow_root_namespace * +mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type); #endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cab228cf51c6..4fbbcf35498b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -80,23 +80,15 @@ enum { enum { MLX5_OBJ_TYPE_SW_ICM = 0x0008, - MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT = 0x23, -}; - -enum { - MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), - MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), - MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13), - MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT = - (1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT), - MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39), -}; - -enum { MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b, MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS = 0x001c, MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018, + MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT = 0x23, + MLX5_OBJ_TYPE_STC = 0x0040, + MLX5_OBJ_TYPE_RTC = 0x0041, + MLX5_OBJ_TYPE_STE = 0x0042, + MLX5_OBJ_TYPE_MODIFY_HDR_PATTERN = 0x0043, MLX5_OBJ_TYPE_PAGE_TRACK = 0x46, MLX5_OBJ_TYPE_MKEY = 0xff01, MLX5_OBJ_TYPE_QP = 0xff02, @@ -112,6 +104,16 @@ enum { MLX5_OBJ_TYPE_RQT = 0xff0e, MLX5_OBJ_TYPE_FLOW_COUNTER = 0xff0f, MLX5_OBJ_TYPE_CQ = 0xff10, + MLX5_OBJ_TYPE_FT_ALIAS = 0xff15, +}; + +enum { + MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM = (1ULL << MLX5_OBJ_TYPE_SW_ICM), + MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT = (1ULL << 11), + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q = (1ULL << 13), + MLX5_GENERAL_OBJ_TYPES_CAP_HEADER_MODIFY_ARGUMENT = + (1ULL << MLX5_OBJ_TYPE_HEADER_MODIFY_ARGUMENT), + MLX5_GENERAL_OBJ_TYPES_CAP_MACSEC_OFFLOAD = (1ULL << 39), }; enum { @@ -313,6 +315,8 @@ enum { MLX5_CMD_OP_MODIFY_VHCA_STATE = 0xb0e, MLX5_CMD_OP_SYNC_CRYPTO = 0xb12, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS = 0xb16, + MLX5_CMD_OP_GENERATE_WQE = 0xb17, + MLX5_CMD_OPCODE_QUERY_VUID = 0xb22, MLX5_CMD_OP_MAX }; @@ -485,7 +489,13 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_66[0x2]; u8 reformat_add_macsec[0x1]; u8 reformat_remove_macsec[0x1]; - u8 reserved_at_6a[0xe]; + u8 reparse[0x1]; + u8 reserved_at_6b[0x1]; + u8 cross_vhca_object[0x1]; + u8 reformat_l2_to_l3_audp_tunnel[0x1]; + u8 reformat_l3_audp_tunnel_to_l2[0x1]; + u8 ignore_flow_level_rtc_valid[0x1]; + u8 reserved_at_70[0x8]; u8 log_max_ft_num[0x8]; u8 reserved_at_80[0x10]; @@ -522,7 +532,15 @@ struct mlx5_ifc_ipv6_layout_bits { u8 ipv6[16][0x8]; }; +struct mlx5_ifc_ipv6_simple_layout_bits { + u8 ipv6_127_96[0x20]; + u8 ipv6_95_64[0x20]; + u8 ipv6_63_32[0x20]; + u8 ipv6_31_0[0x20]; +}; + union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { + struct mlx5_ifc_ipv6_simple_layout_bits ipv6_simple_layout; struct mlx5_ifc_ipv6_layout_bits ipv6_layout; struct mlx5_ifc_ipv4_layout_bits ipv4_layout; u8 reserved_at_0[0x80]; @@ -911,7 +929,9 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_at_8[0x5]; u8 fdb_uplink_hairpin[0x1]; u8 fdb_multi_path_any_table_limit_regc[0x1]; - u8 reserved_at_f[0x3]; + u8 reserved_at_f[0x1]; + u8 fdb_dynamic_tunnel[0x1]; + u8 reserved_at_11[0x1]; u8 fdb_multi_path_any_table[0x1]; u8 reserved_at_13[0x2]; u8 fdb_modify_header_fwd_to_table[0x1]; @@ -950,6 +970,73 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_at_1900[0x6700]; }; +struct mlx5_ifc_wqe_based_flow_table_cap_bits { + u8 reserved_at_0[0x3]; + u8 log_max_num_ste[0x5]; + u8 reserved_at_8[0x3]; + u8 log_max_num_stc[0x5]; + u8 reserved_at_10[0x3]; + u8 log_max_num_rtc[0x5]; + u8 reserved_at_18[0x3]; + u8 log_max_num_header_modify_pattern[0x5]; + + u8 rtc_hash_split_table[0x1]; + u8 rtc_linear_lookup_table[0x1]; + u8 reserved_at_22[0x1]; + u8 stc_alloc_log_granularity[0x5]; + u8 reserved_at_28[0x3]; + u8 stc_alloc_log_max[0x5]; + u8 reserved_at_30[0x3]; + u8 ste_alloc_log_granularity[0x5]; + u8 reserved_at_38[0x3]; + u8 ste_alloc_log_max[0x5]; + + u8 reserved_at_40[0xb]; + u8 rtc_reparse_mode[0x5]; + u8 reserved_at_50[0x3]; + u8 rtc_index_mode[0x5]; + u8 reserved_at_58[0x3]; + u8 rtc_log_depth_max[0x5]; + + u8 reserved_at_60[0x10]; + u8 ste_format[0x10]; + + u8 stc_action_type[0x80]; + + u8 header_insert_type[0x10]; + u8 header_remove_type[0x10]; + + u8 trivial_match_definer[0x20]; + + u8 reserved_at_140[0x1b]; + u8 rtc_max_num_hash_definer_gen_wqe[0x5]; + + u8 reserved_at_160[0x18]; + u8 access_index_mode[0x8]; + + u8 reserved_at_180[0x10]; + u8 ste_format_gen_wqe[0x10]; + + u8 linear_match_definer_reg_c3[0x20]; + + u8 fdb_jump_to_tir_stc[0x1]; + u8 reserved_at_1c1[0x1f]; +}; + +struct mlx5_ifc_esw_cap_bits { + u8 reserved_at_0[0x1d]; + u8 merged_eswitch[0x1]; + u8 reserved_at_1e[0x2]; + + u8 reserved_at_20[0x40]; + + u8 esw_manager_vport_number_valid[0x1]; + u8 reserved_at_61[0xf]; + u8 esw_manager_vport_number[0x10]; + + u8 reserved_at_80[0x780]; +}; + enum { MLX5_COUNTER_SOURCE_ESWITCH = 0x0, MLX5_COUNTER_FLOW_ESWITCH = 0x1, @@ -1027,7 +1114,8 @@ struct mlx5_ifc_qos_cap_bits { u8 max_tsar_bw_share[0x20]; - u8 reserved_at_100[0x20]; + u8 nic_element_type[0x10]; + u8 nic_tsar_type[0x10]; u8 reserved_at_120[0x3]; u8 log_meter_aso_granularity[0x5]; @@ -1325,11 +1413,13 @@ struct mlx5_ifc_atomic_caps_bits { u8 reserved_at_e0[0x720]; }; -struct mlx5_ifc_odp_cap_bits { +struct mlx5_ifc_odp_scheme_cap_bits { u8 reserved_at_0[0x40]; u8 sig[0x1]; - u8 reserved_at_41[0x1f]; + u8 reserved_at_41[0x4]; + u8 page_prefetch[0x1]; + u8 reserved_at_46[0x1a]; u8 reserved_at_60[0x20]; @@ -1343,7 +1433,20 @@ struct mlx5_ifc_odp_cap_bits { struct mlx5_ifc_odp_per_transport_service_cap_bits dc_odp_caps; - u8 reserved_at_120[0x6E0]; + u8 reserved_at_120[0xe0]; +}; + +struct mlx5_ifc_odp_cap_bits { + struct mlx5_ifc_odp_scheme_cap_bits transport_page_fault_scheme_cap; + + struct mlx5_ifc_odp_scheme_cap_bits memory_page_fault_scheme_cap; + + u8 reserved_at_400[0x200]; + + u8 mem_page_fault[0x1]; + u8 reserved_at_601[0x1f]; + + u8 reserved_at_620[0x1e0]; }; struct mlx5_ifc_tls_cap_bits { @@ -1443,9 +1546,13 @@ enum { }; enum { + MLX5_FLEX_IPV4_OVER_VXLAN_ENABLED = 1 << 0, + MLX5_FLEX_IPV6_OVER_VXLAN_ENABLED = 1 << 1, + MLX5_FLEX_IPV6_OVER_IP_ENABLED = 1 << 2, MLX5_FLEX_PARSER_GENEVE_ENABLED = 1 << 3, MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED = 1 << 4, MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED = 1 << 5, + MLX5_FLEX_P_BIT_VXLAN_GPE_ENABLED = 1 << 6, MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED = 1 << 7, MLX5_FLEX_PARSER_ICMP_V4_ENABLED = 1 << 8, MLX5_FLEX_PARSER_ICMP_V6_ENABLED = 1 << 9, @@ -1650,7 +1757,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pci_sync_for_fw_update_event[0x1]; u8 reserved_at_1f2[0x6]; u8 init2_lag_tx_port_affinity[0x1]; - u8 reserved_at_1fa[0x3]; + u8 reserved_at_1fa[0x2]; + u8 wqe_based_flow_table_update_cap[0x1]; u8 cqe_version[0x4]; u8 compact_address_vector[0x1]; @@ -1764,7 +1872,12 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_328[0x2]; u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0x6]; + u8 dp_ordering_ooo_all_ud[0x1]; + u8 dp_ordering_ooo_all_uc[0x1]; + u8 dp_ordering_ooo_all_xrc[0x1]; + u8 dp_ordering_ooo_all_dc[0x1]; + u8 dp_ordering_ooo_all_rc[0x1]; + u8 pcie_reset_using_hotreset_method[0x1]; u8 pci_sync_for_fw_update_with_driver_unload[0x1]; u8 vnic_env_cnt_steering_fail[0x1]; u8 vport_counter_local_loopback[0x1]; @@ -1885,7 +1998,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_5a0[0x10]; u8 enhanced_cqe_compression[0x1]; - u8 reserved_at_5b1[0x2]; + u8 reserved_at_5b1[0x1]; + u8 crossing_vhca_mkey[0x1]; u8 log_max_dek[0x5]; u8 reserved_at_5b8[0x4]; u8 mini_cqe_resp_stride_index[0x1]; @@ -1954,12 +2068,14 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 dynamic_msix_table_size[0xc]; u8 reserved_at_740[0xc]; u8 min_dynamic_vf_msix_table_size[0x4]; - u8 reserved_at_750[0x4]; + u8 reserved_at_750[0x2]; + u8 data_direct[0x1]; + u8 reserved_at_753[0x1]; u8 max_dynamic_vf_msix_table_size[0xc]; u8 reserved_at_760[0x3]; u8 log_max_num_header_modify_argument[0x5]; - u8 reserved_at_768[0x4]; + u8 log_header_modify_argument_granularity_offset[0x4]; u8 log_header_modify_argument_granularity[0x4]; u8 reserved_at_770[0x3]; u8 log_header_modify_argument_max_alloc[0x5]; @@ -1982,7 +2098,13 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_0[0x80]; u8 migratable[0x1]; - u8 reserved_at_81[0x1f]; + u8 reserved_at_81[0x7]; + u8 dp_ordering_force[0x1]; + u8 reserved_at_89[0x9]; + u8 query_vuid[0x1]; + u8 reserved_at_93[0x5]; + u8 umr_log_entity_size_5[0x1]; + u8 reserved_at_99[0x7]; u8 max_reformat_insert_size[0x8]; u8 max_reformat_insert_offset[0x8]; @@ -2006,7 +2128,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_140[0x60]; u8 flow_table_type_2_type[0x8]; - u8 reserved_at_1a8[0x3]; + u8 reserved_at_1a8[0x2]; + u8 format_select_dw_8_6_ext[0x1]; u8 log_min_mkey_entity_size[0x5]; u8 reserved_at_1b0[0x10]; @@ -2021,12 +2144,23 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 ts_cqe_metadata_size2wqe_counter[0x5]; u8 reserved_at_250[0x10]; - u8 reserved_at_260[0x120]; + u8 reserved_at_260[0x20]; + + u8 format_select_dw_gtpu_dw_0[0x8]; + u8 format_select_dw_gtpu_dw_1[0x8]; + u8 format_select_dw_gtpu_dw_2[0x8]; + u8 format_select_dw_gtpu_first_ext_dw_0[0x8]; + + u8 generate_wqe_type[0x20]; + + u8 reserved_at_2c0[0xc0]; + u8 reserved_at_380[0xb]; u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; - u8 reserved_at_3a0[0x10]; + u8 reserved_at_3a0[0xa]; + u8 max_mkey_log_entity_size_mtt[0x6]; u8 max_rqt_vhca_id[0x10]; u8 reserved_at_3c0[0x20]; @@ -2037,9 +2171,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_400[0x1]; u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1]; - u8 reserved_at_402[0x1e]; + u8 reserved_at_402[0xe]; + u8 return_reg_id[0x10]; - u8 reserved_at_420[0x20]; + u8 reserved_at_420[0x1c]; + u8 flow_table_hash_type[0x4]; u8 reserved_at_440[0x8]; u8 max_num_eqs_24b[0x18]; @@ -2086,7 +2222,7 @@ struct mlx5_ifc_extended_dest_format_bits { u8 reserved_at_60[0x20]; }; -union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits { +union mlx5_ifc_dest_format_flow_counter_list_auto_bits { struct mlx5_ifc_extended_dest_format_bits extended_dest_format; struct mlx5_ifc_flow_counter_list_bits flow_counter_list; }; @@ -2178,7 +2314,10 @@ struct mlx5_ifc_wq_bits { u8 reserved_at_139[0x4]; u8 log_wqe_stride_size[0x3]; - u8 reserved_at_140[0x80]; + u8 dbr_umem_id[0x20]; + u8 wq_umem_id[0x20]; + + u8 wq_umem_offset[0x40]; u8 headers_mkey[0x20]; @@ -3391,7 +3530,8 @@ struct mlx5_ifc_qpc_bits { u8 latency_sensitive[0x1]; u8 reserved_at_24[0x1]; u8 drain_sigerr[0x1]; - u8 reserved_at_26[0x2]; + u8 reserved_at_26[0x1]; + u8 dp_ordering_force[0x1]; u8 pd[0x18]; u8 mtu[0x3]; @@ -3464,7 +3604,8 @@ struct mlx5_ifc_qpc_bits { u8 rae[0x1]; u8 reserved_at_493[0x1]; u8 page_offset[0x6]; - u8 reserved_at_49a[0x3]; + u8 reserved_at_49a[0x2]; + u8 dp_ordering_1[0x1]; u8 cd_slave_receive[0x1]; u8 cd_slave_send[0x1]; u8 cd_master[0x1]; @@ -3562,6 +3703,8 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; + struct mlx5_ifc_wqe_based_flow_table_cap_bits wqe_based_flow_table_cap; + struct mlx5_ifc_esw_cap_bits esw_cap; struct mlx5_ifc_e_switch_cap_bits e_switch_cap; struct mlx5_ifc_port_selection_cap_bits port_selection_cap; struct mlx5_ifc_qos_cap_bits qos_cap; @@ -3678,7 +3821,7 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_1300[0x500]; - union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[]; + union mlx5_ifc_dest_format_flow_counter_list_auto_bits destination[]; }; enum { @@ -3919,7 +4062,8 @@ struct mlx5_ifc_sqc_bits { u8 reg_umr[0x1]; u8 allow_swp[0x1]; u8 hairpin[0x1]; - u8 reserved_at_f[0xb]; + u8 non_wire[0x1]; + u8 reserved_at_10[0xa]; u8 ts_format[0x2]; u8 reserved_at_1c[0x4]; @@ -3966,13 +4110,50 @@ enum { ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, + ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, +}; + +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +enum { + TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, + TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, + TSAR_TYPE_CAP_MASK_ETS = 1 << 2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +union mlx5_ifc_element_attributes_bits { + struct mlx5_ifc_tsar_element_bits tsar; + struct mlx5_ifc_vport_element_bits vport; + struct mlx5_ifc_vport_tc_element_bits vport_tc; + u8 reserved_at_0[0x20]; }; struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; - u8 element_attributes[0x20]; + union mlx5_ifc_element_attributes_bits element_attributes; u8 parent_element_id[0x20]; @@ -4154,6 +4335,7 @@ enum { MLX5_MKC_ACCESS_MODE_KSM = 0x3, MLX5_MKC_ACCESS_MODE_SW_ICM = 0x4, MLX5_MKC_ACCESS_MODE_MEMIC = 0x5, + MLX5_MKC_ACCESS_MODE_CROSSING = 0x6, }; struct mlx5_ifc_mkc_bits { @@ -4196,14 +4378,16 @@ struct mlx5_ifc_mkc_bits { u8 bsf_octword_size[0x20]; - u8 reserved_at_120[0x80]; + u8 reserved_at_120[0x60]; + + u8 crossing_target_vhca_id[0x10]; + u8 reserved_at_190[0x10]; u8 translations_octword_size[0x20]; u8 reserved_at_1c0[0x19]; u8 relaxed_ordering_read[0x1]; - u8 reserved_at_1d9[0x1]; - u8 log_page_size[0x5]; + u8 log_page_size[0x6]; u8 reserved_at_1e0[0x20]; }; @@ -4367,7 +4551,8 @@ struct mlx5_ifc_dctc_bits { u8 state[0x4]; u8 reserved_at_8[0x18]; - u8 reserved_at_20[0x8]; + u8 reserved_at_20[0x7]; + u8 dp_ordering_force[0x1]; u8 user_index[0x18]; u8 reserved_at_40[0x8]; @@ -4382,7 +4567,9 @@ struct mlx5_ifc_dctc_bits { u8 latency_sensitive[0x1]; u8 rlky[0x1]; u8 free_ar[0x1]; - u8 reserved_at_73[0xd]; + u8 reserved_at_73[0x1]; + u8 dp_ordering_1[0x1]; + u8 reserved_at_75[0xb]; u8 reserved_at_80[0x8]; u8 cs_res[0x8]; @@ -4658,29 +4845,6 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; -struct mlx5_ifc_vport_tc_element_bits { - u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; - u8 vport_number[0x10]; -}; - -struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; - u8 vport_number[0x10]; -}; - -enum { - TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, - TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, - TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, -}; - -struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; - u8 tsar_type[0x8]; - u8 reserved_at_10[0x10]; -}; - enum { MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0, MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1, @@ -4961,6 +5125,16 @@ struct mlx5_ifc_set_fte_in_bits { struct mlx5_ifc_flow_context_bits flow_context; }; +struct mlx5_ifc_dest_format_bits { + u8 destination_type[0x8]; + u8 destination_id[0x18]; + + u8 destination_eswitch_owner_vhca_id_valid[0x1]; + u8 packet_reformat[0x1]; + u8 reserved_at_22[0xe]; + u8 destination_eswitch_owner_vhca_id[0x10]; +}; + struct mlx5_ifc_rts2rts_qp_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -5124,6 +5298,36 @@ struct mlx5_ifc_query_vport_state_out_bits { u8 state[0x4]; }; +struct mlx5_ifc_array1024_auto_bits { + u8 array1024_auto[32][0x20]; +}; + +struct mlx5_ifc_query_vuid_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x40]; + + u8 query_vfs_vuid[0x1]; + u8 data_direct[0x1]; + u8 reserved_at_62[0xe]; + u8 vhca_id[0x10]; +}; + +struct mlx5_ifc_query_vuid_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x1a0]; + + u8 reserved_at_1e0[0x10]; + u8 num_of_entries[0x10]; + + struct mlx5_ifc_array1024_auto_bits vuid[]; +}; + enum { MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT = 0x0, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT = 0x1, @@ -6127,7 +6331,8 @@ struct mlx5_ifc_flow_table_context_bits { u8 termination_table[0x1]; u8 table_miss_action[0x4]; u8 level[0x8]; - u8 reserved_at_10[0x8]; + u8 rtc_valid[0x1]; + u8 reserved_at_11[0x7]; u8 log_size[0x8]; u8 reserved_at_20[0x8]; @@ -6137,11 +6342,21 @@ struct mlx5_ifc_flow_table_context_bits { u8 lag_master_next_table_id[0x18]; u8 reserved_at_60[0x60]; + union { + struct { + u8 sw_owner_icm_root_1[0x40]; - u8 sw_owner_icm_root_1[0x40]; + u8 sw_owner_icm_root_0[0x40]; + } sws; + struct { + u8 rtc_id_0[0x20]; + + u8 rtc_id_1[0x20]; - u8 sw_owner_icm_root_0[0x40]; + u8 reserved_at_100[0x40]; + } hws; + }; }; struct mlx5_ifc_query_flow_table_out_bits { @@ -7217,6 +7432,30 @@ struct mlx5_ifc_qp_2err_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_trans_page_fault_info_bits { + u8 error[0x1]; + u8 reserved_at_1[0x4]; + u8 page_fault_type[0x3]; + u8 wq_number[0x18]; + + u8 reserved_at_20[0x8]; + u8 fault_token[0x18]; +}; + +struct mlx5_ifc_mem_page_fault_info_bits { + u8 error[0x1]; + u8 reserved_at_1[0xf]; + u8 fault_token_47_32[0x10]; + + u8 fault_token_31_0[0x20]; +}; + +union mlx5_ifc_page_fault_resume_in_page_fault_info_auto_bits { + struct mlx5_ifc_trans_page_fault_info_bits trans_page_fault_info; + struct mlx5_ifc_mem_page_fault_info_bits mem_page_fault_info; + u8 reserved_at_0[0x40]; +}; + struct mlx5_ifc_page_fault_resume_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7233,13 +7472,8 @@ struct mlx5_ifc_page_fault_resume_in_bits { u8 reserved_at_20[0x10]; u8 op_mod[0x10]; - u8 error[0x1]; - u8 reserved_at_41[0x4]; - u8 page_fault_type[0x3]; - u8 wq_number[0x18]; - - u8 reserved_at_60[0x8]; - u8 token[0x18]; + union mlx5_ifc_page_fault_resume_in_page_fault_info_auto_bits + page_fault_info; }; struct mlx5_ifc_nop_out_bits { @@ -8923,7 +9157,9 @@ struct mlx5_ifc_create_qp_in_bits { struct mlx5_ifc_qpc_bits qpc; - u8 reserved_at_800[0x60]; + u8 wq_umem_offset[0x40]; + + u8 wq_umem_id[0x20]; u8 wq_umem_valid[0x1]; u8 reserved_at_861[0x1f]; @@ -8989,7 +9225,8 @@ struct mlx5_ifc_create_mkey_in_bits { u8 pg_access[0x1]; u8 mkey_umem_valid[0x1]; - u8 reserved_at_62[0x1e]; + u8 data_direct[0x1]; + u8 reserved_at_63[0x1d]; struct mlx5_ifc_mkc_bits memory_key_mkey_entry; @@ -10401,6 +10638,18 @@ struct mlx5_ifc_mcam_access_reg_bits2 { u8 regs_31_to_0[0x20]; }; +struct mlx5_ifc_mcam_access_reg_bits3 { + u8 regs_127_to_96[0x20]; + + u8 regs_95_to_64[0x20]; + + u8 regs_63_to_32[0x20]; + + u8 regs_31_to_2[0x1e]; + u8 mtctr[0x1]; + u8 mtptm[0x1]; +}; + struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; @@ -10413,6 +10662,7 @@ struct mlx5_ifc_mcam_reg_bits { struct mlx5_ifc_mcam_access_reg_bits access_regs; struct mlx5_ifc_mcam_access_reg_bits1 access_regs1; struct mlx5_ifc_mcam_access_reg_bits2 access_regs2; + struct mlx5_ifc_mcam_access_reg_bits3 access_regs3; u8 reserved_at_0[0x80]; } mng_access_reg_cap_mask; @@ -11035,6 +11285,11 @@ struct mlx5_ifc_mcda_reg_bits { }; enum { + MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE = 0, + MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET = 1, +}; + +enum { MLX5_MFRL_REG_RESET_STATE_IDLE = 0, MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1, MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2, @@ -11061,7 +11316,8 @@ struct mlx5_ifc_mfrl_reg_bits { u8 pci_sync_for_fw_update_start[0x1]; u8 pci_sync_for_fw_update_resp[0x2]; u8 rst_type_sel[0x3]; - u8 reserved_at_28[0x4]; + u8 pci_reset_req_method[0x3]; + u8 reserved_at_2b[0x1]; u8 reset_state[0x4]; u8 reset_type[0x8]; u8 reset_level[0x8]; @@ -11166,6 +11422,34 @@ struct mlx5_ifc_mtmp_reg_bits { u8 sensor_name_lo[0x20]; }; +struct mlx5_ifc_mtptm_reg_bits { + u8 reserved_at_0[0x10]; + u8 psta[0x1]; + u8 reserved_at_11[0xf]; + + u8 reserved_at_20[0x60]; +}; + +enum { + MLX5_MTCTR_REQUEST_NOP = 0x0, + MLX5_MTCTR_REQUEST_PTM_ROOT_CLOCK = 0x1, + MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER = 0x2, + MLX5_MTCTR_REQUEST_REAL_TIME_CLOCK = 0x3, +}; + +struct mlx5_ifc_mtctr_reg_bits { + u8 first_clock_timestamp_request[0x8]; + u8 second_clock_timestamp_request[0x8]; + u8 reserved_at_10[0x10]; + + u8 first_clock_valid[0x1]; + u8 second_clock_valid[0x1]; + u8 reserved_at_22[0x1e]; + + u8 first_clock_timestamp[0x40]; + u8 second_clock_timestamp[0x40]; +}; + union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_bufferx_reg_bits bufferx_reg; struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout; @@ -11230,6 +11514,8 @@ union mlx5_ifc_ports_control_registers_document_bits { struct mlx5_ifc_mrtc_reg_bits mrtc_reg; struct mlx5_ifc_mtcap_reg_bits mtcap_reg; struct mlx5_ifc_mtmp_reg_bits mtmp_reg; + struct mlx5_ifc_mtptm_reg_bits mtptm_reg; + struct mlx5_ifc_mtctr_reg_bits mtctr_reg; u8 reserved_at_0[0x60e0]; }; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index ad1ce650146c..fc7eeff99a8a 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -149,6 +149,7 @@ enum { MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, MLX5_WQE_CTRL_CQ_UPDATE_AND_EQE = 3 << 2, MLX5_WQE_CTRL_SOLICITED = 1 << 1, + MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE = 1 << 5, }; enum { diff --git a/include/linux/mm.h b/include/linux/mm.h index 6549d0979b28..c39c4945946c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -97,6 +97,14 @@ extern const int mmap_rnd_compat_bits_max; extern int mmap_rnd_compat_bits __read_mostly; #endif +#ifndef DIRECT_MAP_PHYSMEM_END +# ifdef MAX_PHYSMEM_BITS +# define DIRECT_MAP_PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) +# else +# define DIRECT_MAP_PHYSMEM_END (((phys_addr_t)-1)&~(1ULL<<63)) +# endif +#endif + #include <asm/page.h> #include <asm/processor.h> @@ -321,21 +329,27 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) +#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ #ifdef CONFIG_ARCH_HAS_PKEYS -# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 -# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 /* A protection key is a 4-bit value */ -# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 /* on x86 and 5-bit value on ppc64 */ -# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 -# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 -#ifdef CONFIG_PPC +# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 +# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 +# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 +# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +#if CONFIG_ARCH_PKEY_BITS > 3 +# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +#else +# define VM_PKEY_BIT3 0 +#endif +#if CONFIG_ARCH_PKEY_BITS > 4 # define VM_PKEY_BIT4 VM_HIGH_ARCH_4 #else # define VM_PKEY_BIT4 0 @@ -353,13 +367,23 @@ extern unsigned int kobjsize(const void *objp); * for more details on the guard size. */ # define VM_SHADOW_STACK VM_HIGH_ARCH_5 -#else +#endif + +#if defined(CONFIG_ARM64_GCS) +/* + * arm64's Guarded Control Stack implements similar functionality and + * has similar constraints to shadow stacks. + */ +# define VM_SHADOW_STACK VM_HIGH_ARCH_6 +#endif + +#ifndef VM_SHADOW_STACK # define VM_SHADOW_STACK VM_NONE #endif #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ -#elif defined(CONFIG_PPC) +#elif defined(CONFIG_PPC64) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 @@ -374,8 +398,8 @@ extern unsigned int kobjsize(const void *objp); #endif #if defined(CONFIG_ARM64_MTE) -# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ -# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ +# define VM_MTE VM_HIGH_ARCH_4 /* Use Tagged memory for access control */ +# define VM_MTE_ALLOWED VM_HIGH_ARCH_5 /* Tagged memory permitted */ #else # define VM_MTE VM_NONE # define VM_MTE_ALLOWED VM_NONE @@ -409,6 +433,8 @@ extern unsigned int kobjsize(const void *objp); #ifdef CONFIG_64BIT #define VM_DROPPABLE_BIT 40 #define VM_DROPPABLE BIT(VM_DROPPABLE_BIT) +#elif defined(CONFIG_PPC32) +#define VM_DROPPABLE VM_ARCH_1 #else #define VM_DROPPABLE VM_NONE #endif @@ -1005,27 +1031,6 @@ static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) return mas_prev(&vmi->mas, 0); } -static inline -struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi) -{ - return mas_prev_range(&vmi->mas, 0); -} - -static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) -{ - return vmi->mas.index; -} - -static inline unsigned long vma_iter_end(struct vma_iterator *vmi) -{ - return vmi->mas.last + 1; -} -static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi, - unsigned long count) -{ - return mas_expected_entries(&vmi->mas, count); -} - static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, unsigned long start, unsigned long end, gfp_t gfp) { @@ -1249,8 +1254,7 @@ static inline int folio_mapcount(const struct folio *folio) if (likely(!folio_test_large(folio))) { mapcount = atomic_read(&folio->_mapcount) + 1; - /* Handle page_has_type() pages */ - if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + if (page_mapcount_is_type(mapcount)) mapcount = 0; return mapcount; } @@ -1294,8 +1298,6 @@ static inline struct folio *virt_to_folio(const void *x) void __folio_put(struct folio *folio); -void put_pages_list(struct list_head *pages); - void split_page(struct page *page, unsigned int order); void folio_copy(struct folio *dst, struct folio *src); int folio_mc_copy(struct folio *dst, struct folio *src); @@ -1597,6 +1599,7 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty); void unpin_user_pages(struct page **pages, unsigned long npages); +void unpin_user_folio(struct folio *folio, unsigned long npages); void unpin_folios(struct folio **folios, unsigned long nfolios); static inline bool is_cow_mapping(vm_flags_t flags) @@ -1745,6 +1748,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) __set_bit(pid_bit, &vma->numab_state->pids_active[1]); } } + +bool folio_use_access_time(struct folio *folio); #else /* !CONFIG_NUMA_BALANCING */ static inline int folio_xchg_last_cpupid(struct folio *folio, int cpupid) { @@ -1798,6 +1803,10 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) { } +static inline bool folio_use_access_time(struct folio *folio) +{ + return false; +} #endif /* CONFIG_NUMA_BALANCING */ #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) @@ -1896,7 +1905,7 @@ static inline unsigned long page_to_section(const struct page *page) * * Return: The Page Frame Number of the first page in the folio. */ -static inline unsigned long folio_pfn(struct folio *folio) +static inline unsigned long folio_pfn(const struct folio *folio) { return page_to_pfn(&folio->page); } @@ -2147,14 +2156,19 @@ static inline size_t folio_size(const struct folio *folio) * MM ("mapped shared"), or if the folio is only mapped into a single MM * ("mapped exclusively"). * + * For KSM folios, this function also returns "mapped shared" when a folio is + * mapped multiple times into the same MM, because the individual page mappings + * are independent. + * * As precise information is not easily available for all folios, this function * estimates the number of MMs ("sharers") that are currently mapping a folio * using the number of times the first page of the folio is currently mapped * into page tables. * - * For small anonymous folios (except KSM folios) and anonymous hugetlb folios, - * the return value will be exactly correct, because they can only be mapped - * at most once into an MM, and they cannot be partially mapped. + * For small anonymous folios and anonymous hugetlb folios, the return + * value will be exactly correct: non-KSM folios can only be mapped at most once + * into an MM, and they cannot be partially mapped. KSM folios are + * considered shared even if mapped multiple times into the same MM. * * For other folios, the result can be fuzzy: * #. For partially-mappable large folios (THP), the return value can wrongly @@ -2163,9 +2177,6 @@ static inline size_t folio_size(const struct folio *folio) * #. For pagecache folios (including hugetlb), the return value can wrongly * indicate "mapped shared" (false positive) when two VMAs in the same MM * cover the same file range. - * #. For (small) KSM folios, the return value can wrongly indicate "mapped - * shared" (false positive), when the folio is mapped multiple times into - * the same MM. * * Further, this function only considers current page table mappings that * are tracked using the folio mapcount(s). @@ -2199,26 +2210,10 @@ static inline bool folio_likely_mapped_shared(struct folio *folio) return atomic_read(&folio->_mapcount) > 0; } -#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -static inline int arch_make_page_accessible(struct page *page) -{ - return 0; -} -#endif - #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE static inline int arch_make_folio_accessible(struct folio *folio) { - int ret; - long i, nr = folio_nr_pages(folio); - - for (i = 0; i < nr; i++) { - ret = arch_make_page_accessible(folio_page(folio, i)); - if (ret) - break; - } - - return ret; + return 0; } #endif @@ -2398,11 +2393,40 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -int follow_pte(struct vm_area_struct *vma, unsigned long address, - pte_t **ptepp, spinlock_t **ptlp); int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); +struct follow_pfnmap_args { + /** + * Inputs: + * @vma: Pointer to @vm_area_struct struct + * @address: the virtual address to walk + */ + struct vm_area_struct *vma; + unsigned long address; + /** + * Internals: + * + * The caller shouldn't touch any of these. + */ + spinlock_t *lock; + pte_t *ptep; + /** + * Outputs: + * + * @pfn: the PFN of the address + * @pgprot: the pgprot_t of the mapping + * @writable: whether the mapping is writable + * @special: whether the mapping is a special mapping (real PFN maps) + */ + unsigned long pfn; + pgprot_t pgprot; + bool writable; + bool special; +}; +int follow_pfnmap_start(struct follow_pfnmap_args *args); +void follow_pfnmap_end(struct follow_pfnmap_args *args); + extern void truncate_pagecache(struct inode *inode, loff_t new); extern void truncate_setsize(struct inode *inode, loff_t newsize); void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); @@ -2510,6 +2534,7 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, struct folio **folios, unsigned int max_folios, pgoff_t *offset); +int folio_add_pins(struct folio *folio, unsigned int pins); int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); @@ -2525,16 +2550,12 @@ struct kvec; struct page *get_dump_page(unsigned long addr); bool folio_mark_dirty(struct folio *folio); +bool folio_mark_dirty_lock(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); -extern unsigned long move_page_tables(struct vm_area_struct *vma, - unsigned long old_addr, struct vm_area_struct *new_vma, - unsigned long new_addr, unsigned long len, - bool need_rmap_locks, bool for_stack); - /* * Flags used by change_protection(). For now we make it a bitmap so * that we can pass in multiple flags just like parameters. However @@ -2555,21 +2576,6 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) -bool vma_needs_dirty_tracking(struct vm_area_struct *vma); -bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); -static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) -{ - /* - * We want to check manually if we can change individual PTEs writable - * if we can't do that automatically for all PTEs in a mapping. For - * private mappings, that's always the case when we have write - * permissions as we properly have to handle COW. - */ - if (vma->vm_flags & VM_SHARED) - return vma_wants_writenotify(vma, vma->vm_page_prot); - return !!(vma->vm_flags & VM_WRITE); - -} bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte); extern long change_protection(struct mmu_gather *tlb, @@ -2693,6 +2699,30 @@ static inline pte_t pte_mkspecial(pte_t pte) } #endif +#ifndef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP +static inline bool pmd_special(pmd_t pmd) +{ + return false; +} + +static inline pmd_t pmd_mkspecial(pmd_t pmd) +{ + return pmd; +} +#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */ + +#ifndef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP +static inline bool pud_special(pud_t pud) +{ + return false; +} + +static inline pud_t pud_mkspecial(pud_t pud) +{ + return pud; +} +#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ + #ifndef CONFIG_ARCH_HAS_PTE_DEVMAP static inline int pte_devmap(pte_t pte) { @@ -2885,7 +2915,7 @@ static inline void pagetable_free(struct ptdesc *pt) __free_pages(page, compound_order(page)); } -#if USE_SPLIT_PTE_PTLOCKS +#if defined(CONFIG_SPLIT_PTE_PTLOCKS) #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); bool ptlock_alloc(struct ptdesc *ptdesc); @@ -2943,7 +2973,7 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) return true; } -#else /* !USE_SPLIT_PTE_PTLOCKS */ +#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ @@ -2958,7 +2988,7 @@ static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} -#endif /* USE_SPLIT_PTE_PTLOCKS */ +#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) { @@ -2997,8 +3027,11 @@ static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, return pte; } -pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp); +pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, spinlock_t **ptlp); +pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, pmd_t *pmdvalp, + spinlock_t **ptlp); #define pte_unmap_unlock(pte, ptl) do { \ spin_unlock(ptl); \ @@ -3018,7 +3051,7 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd, ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \ NULL: pte_offset_kernel(pmd, address)) -#if USE_SPLIT_PMD_PTLOCKS +#if defined(CONFIG_SPLIT_PMD_PTLOCKS) static inline struct page *pmd_pgtable_page(pmd_t *pmd) { @@ -3277,78 +3310,9 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff, - struct vm_area_struct *next); -extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, pgoff_t pgoff); -extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); -extern void unlink_file_vma(struct vm_area_struct *); -extern struct vm_area_struct *copy_vma(struct vm_area_struct **, - unsigned long addr, unsigned long len, pgoff_t pgoff, - bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); -struct vm_area_struct *vma_modify(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long vm_flags, - struct mempolicy *policy, - struct vm_userfaultfd_ctx uffd_ctx, - struct anon_vma_name *anon_name); - -/* We are about to modify the VMA's flags. */ -static inline struct vm_area_struct -*vma_modify_flags(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long new_flags) -{ - return vma_modify(vmi, prev, vma, start, end, new_flags, - vma_policy(vma), vma->vm_userfaultfd_ctx, - anon_vma_name(vma)); -} - -/* We are about to modify the VMA's flags and/or anon_name. */ -static inline struct vm_area_struct -*vma_modify_flags_name(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, - unsigned long end, - unsigned long new_flags, - struct anon_vma_name *new_name) -{ - return vma_modify(vmi, prev, vma, start, end, new_flags, - vma_policy(vma), vma->vm_userfaultfd_ctx, new_name); -} - -/* We are about to modify the VMA's memory policy. */ -static inline struct vm_area_struct -*vma_modify_policy(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct mempolicy *new_pol) -{ - return vma_modify(vmi, prev, vma, start, end, vma->vm_flags, - new_pol, vma->vm_userfaultfd_ctx, anon_vma_name(vma)); -} - -/* We are about to modify the VMA's flags and/or uffd context. */ -static inline struct vm_area_struct -*vma_modify_flags_uffd(struct vma_iterator *vmi, - struct vm_area_struct *prev, - struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx) -{ - return vma_modify(vmi, prev, vma, start, end, new_flags, - vma_policy(vma), new_ctx, anon_vma_name(vma)); -} +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); static inline int check_data_rlimit(unsigned long rlim, unsigned long new, @@ -3381,10 +3345,6 @@ extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long flags, const struct vm_special_mapping *spec); -/* This is an obsolete alternative to _install_special_mapping. */ -extern int install_special_mapping(struct mm_struct *mm, - unsigned long addr, unsigned long len, - unsigned long flags, struct page **pages); unsigned long randomize_stack_top(unsigned long stack_top); unsigned long randomize_page(unsigned long start, unsigned long range); @@ -3410,14 +3370,14 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool unlock); +int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, + struct mm_struct *mm, unsigned long start, + unsigned long end, struct list_head *uf, bool unlock); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf); extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); #ifdef CONFIG_MMU -extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct list_head *uf, bool unlock); extern int __mm_populate(unsigned long addr, unsigned long len, int ignore_errors); static inline void mm_populate(unsigned long addr, unsigned long len) @@ -3645,9 +3605,6 @@ static inline vm_fault_t vmf_fs_error(int err) return VM_FAULT_SIGBUS; } -struct page *follow_page(struct vm_area_struct *vma, unsigned long address, - unsigned int foll_flags); - static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) { if (vm_fault & VM_FAULT_OOM) @@ -3876,8 +3833,6 @@ void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); -void pmd_init(void *addr); -void pud_init(void *addr); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); @@ -4183,18 +4138,18 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, #ifdef CONFIG_UNACCEPTED_MEMORY -bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end); -void accept_memory(phys_addr_t start, phys_addr_t end); +bool range_contains_unaccepted_memory(phys_addr_t start, unsigned long size); +void accept_memory(phys_addr_t start, unsigned long size); #else static inline bool range_contains_unaccepted_memory(phys_addr_t start, - phys_addr_t end) + unsigned long size) { return false; } -static inline void accept_memory(phys_addr_t start, phys_addr_t end) +static inline void accept_memory(phys_addr_t start, unsigned long size) { } @@ -4202,9 +4157,7 @@ static inline void accept_memory(phys_addr_t start, phys_addr_t end) static inline bool pfn_is_unaccepted_memory(unsigned long pfn) { - phys_addr_t paddr = pfn << PAGE_SHIFT; - - return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); + return range_contains_unaccepted_memory(pfn << PAGE_SHIFT, PAGE_SIZE); } void vma_pgtable_walk_begin(struct vm_area_struct *vma); @@ -4212,4 +4165,18 @@ void vma_pgtable_walk_end(struct vm_area_struct *vma); int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); +#ifdef CONFIG_64BIT +int do_mseal(unsigned long start, size_t len_in, unsigned long flags); +#else +static inline int do_mseal(unsigned long start, size_t len_in, unsigned long flags) +{ + /* noop on 32 bit */ + return 0; +} +#endif + +int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status); +int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); +int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f4fe593c1400..1b6a917fffa4 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -155,6 +155,11 @@ static inline int folio_lru_refs(struct folio *folio) return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; } +static inline void folio_clear_lru_refs(struct folio *folio) +{ + set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0); +} + static inline int folio_lru_gen(struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags); @@ -222,6 +227,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, { unsigned long seq; unsigned long flags; + unsigned long mask; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); @@ -257,7 +263,14 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); + mask = LRU_GEN_MASK; + /* + * Don't clear PG_workingset here because it can affect PSI accounting + * if the activation is due to workingset refault. + */ + if (folio_test_active(folio)) + mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active); + set_mask_bits(&folio->flags, mask, flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ @@ -291,6 +304,12 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return true; } +static inline void folio_migrate_refs(struct folio *new, struct folio *old) +{ + unsigned long refs = READ_ONCE(old->flags) & LRU_REFS_MASK; + + set_mask_bits(&new->flags, LRU_REFS_MASK, refs); +} #else /* !CONFIG_LRU_GEN */ static inline bool lru_gen_enabled(void) @@ -313,6 +332,10 @@ static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, return false; } +static inline void folio_migrate_refs(struct folio *new, struct folio *old) +{ + +} #endif /* CONFIG_LRU_GEN */ static __always_inline @@ -521,7 +544,7 @@ static inline pte_marker copy_pte_marker( { pte_marker srcm = pte_marker_get(entry); /* Always copy error entries. */ - pte_marker dstm = srcm & PTE_MARKER_POISONED; + pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD); /* Only copy PTE markers if UFFD register matches. */ if ((srcm & PTE_MARKER_UFFD_WP) && userfaultfd_wp(dst_vma)) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 485424979254..7361a8f3ab68 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -109,7 +109,7 @@ struct page { /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. - * Used for swp_entry_t if PageSwapCache. + * Used for swp_entry_t if swapcache flag set. * Indicates order in the buddy system if PageBuddy. */ unsigned long private; @@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); */ #define STRUCT_PAGE_MAX_SHIFT (order_base_2(sizeof(struct page))) -#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) -#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) - /* * page_private can be used on tail pages. However, PagePrivate is only * checked by the VM on the head page. So page_private on the tail pages @@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio) return folio->private; } -struct page_frag_cache { - void * va; -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) - __u16 offset; - __u16 size; -#else - __u32 offset; -#endif - /* we maintain a pagecount bias, so that we dont dirty cache line - * containing page->_refcount every time we allocate a fragment. - */ - unsigned int pagecnt_bias; - bool pfmemalloc; -}; - typedef unsigned long vm_flags_t; /* @@ -660,6 +642,9 @@ struct vma_numab_state { * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). + * + * Only explicitly marked struct members may be accessed by RCU readers before + * getting a stable reference. */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -675,7 +660,11 @@ struct vm_area_struct { #endif }; - struct mm_struct *vm_mm; /* The address space we belong to. */ + /* + * The address space we belong to. + * Unstable RCU readers are allowed to read this. + */ + struct mm_struct *vm_mm; pgprot_t vm_page_prot; /* Access permissions of this VMA. */ /* @@ -688,7 +677,10 @@ struct vm_area_struct { }; #ifdef CONFIG_PER_VMA_LOCK - /* Flag to indicate areas detached from the mm->mm_mt tree */ + /* + * Flag to indicate areas detached from the mm->mm_mt tree. + * Unstable RCU readers are allowed to read this. + */ bool detached; /* @@ -706,6 +698,7 @@ struct vm_area_struct { * slowpath. */ int vm_lock_seq; + /* Unstable RCU readers are allowed to read this. */ struct vma_lock *vm_lock; #endif @@ -771,6 +764,7 @@ struct vm_area_struct { struct mm_cid { u64 time; int cid; + int recent_cid; }; #endif @@ -841,6 +835,27 @@ struct mm_struct { * When the next mm_cid scan is due (in jiffies). */ unsigned long mm_cid_next_scan; + /** + * @nr_cpus_allowed: Number of CPUs allowed for mm. + * + * Number of CPUs allowed in the union of all mm's + * threads allowed CPUs. + */ + unsigned int nr_cpus_allowed; + /** + * @max_nr_cid: Maximum number of concurrency IDs allocated. + * + * Track the highest number of concurrency IDs allocated for the + * mm. + */ + atomic_t max_nr_cid; + /** + * @cpus_allowed_lock: Lock protecting mm cpus_allowed. + * + * Provide mutual exclusion for mm cpus_allowed and + * mm nr_cpus_allowed updates. + */ + raw_spinlock_t cpus_allowed_lock; #endif #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* size of all page tables */ @@ -947,7 +962,7 @@ struct mm_struct { #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_subscriptions *notifier_subscriptions; #endif -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS) pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_NUMA_BALANCING @@ -1159,18 +1174,30 @@ static inline int mm_cid_clear_lazy_put(int cid) return cid & ~MM_CID_LAZY_PUT; } +/* + * mm_cpus_allowed: Union of all mm's threads allowed CPUs. + */ +static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm) +{ + unsigned long bitmap = (unsigned long)mm; + + bitmap += offsetof(struct mm_struct, cpu_bitmap); + /* Skip cpu_bitmap */ + bitmap += cpumask_size(); + return (struct cpumask *)bitmap; +} + /* Accessor for struct mm_struct's cidmask. */ static inline cpumask_t *mm_cidmask(struct mm_struct *mm) { - unsigned long cid_bitmap = (unsigned long)mm; + unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm); - cid_bitmap += offsetof(struct mm_struct, cpu_bitmap); - /* Skip cpu_bitmap */ + /* Skip mm_cpus_allowed */ cid_bitmap += cpumask_size(); return (struct cpumask *)cid_bitmap; } -static inline void mm_init_cid(struct mm_struct *mm) +static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { int i; @@ -1178,17 +1205,22 @@ static inline void mm_init_cid(struct mm_struct *mm) struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i); pcpu_cid->cid = MM_CID_UNSET; + pcpu_cid->recent_cid = MM_CID_UNSET; pcpu_cid->time = 0; } + mm->nr_cpus_allowed = p->nr_cpus_allowed; + atomic_set(&mm->max_nr_cid, 0); + raw_spin_lock_init(&mm->cpus_allowed_lock); + cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask); cpumask_clear(mm_cidmask(mm)); } -static inline int mm_alloc_cid_noprof(struct mm_struct *mm) +static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p) { mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid); if (!mm->pcpu_cid) return -ENOMEM; - mm_init_cid(mm); + mm_init_cid(mm, p); return 0; } #define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) @@ -1201,16 +1233,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm) static inline unsigned int mm_cid_size(void) { - return cpumask_size(); + return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */ +} + +static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) +{ + struct cpumask *mm_allowed = mm_cpus_allowed(mm); + + if (!mm) + return; + /* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */ + raw_spin_lock(&mm->cpus_allowed_lock); + cpumask_or(mm_allowed, mm_allowed, cpumask); + WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed)); + raw_spin_unlock(&mm->cpus_allowed_lock); } #else /* CONFIG_SCHED_MM_CID */ -static inline void mm_init_cid(struct mm_struct *mm) { } -static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; } +static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { } +static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; } static inline void mm_destroy_cid(struct mm_struct *mm) { } + static inline unsigned int mm_cid_size(void) { return 0; } +static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { } #endif /* CONFIG_SCHED_MM_CID */ struct mmu_gather; @@ -1313,6 +1360,9 @@ struct vm_special_mapping { int (*mremap)(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma); + + void (*close)(const struct vm_special_mapping *sm, + struct vm_area_struct *vma); }; enum tlb_flush_reason { @@ -1485,4 +1535,88 @@ enum { /* See also internal only FOLL flags in mm/internal.h */ }; +/* mm flags */ + +/* + * The first two bits represent core dump modes for set-user-ID, + * the modes are SUID_DUMP_* defined in linux/sched/coredump.h + */ +#define MMF_DUMPABLE_BITS 2 +#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) +/* coredump filter bits */ +#define MMF_DUMP_ANON_PRIVATE 2 +#define MMF_DUMP_ANON_SHARED 3 +#define MMF_DUMP_MAPPED_PRIVATE 4 +#define MMF_DUMP_MAPPED_SHARED 5 +#define MMF_DUMP_ELF_HEADERS 6 +#define MMF_DUMP_HUGETLB_PRIVATE 7 +#define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 + +#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS +#define MMF_DUMP_FILTER_BITS 9 +#define MMF_DUMP_FILTER_MASK \ + (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) +#define MMF_DUMP_FILTER_DEFAULT \ + ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ + (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) + +#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS +# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) +#else +# define MMF_DUMP_MASK_DEFAULT_ELF 0 +#endif + /* leave room for more dump flags */ +#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ +#define MMF_VM_HUGEPAGE 17 /* set when mm is available for khugepaged */ + +/* + * This one-shot flag is dropped due to necessity of changing exe once again + * on NFS restore + */ +//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ + +#define MMF_HAS_UPROBES 19 /* has uprobes */ +#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ +#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ +#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ +#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ +#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ +#define MMF_MULTIPROCESS 26 /* mm is shared between processes */ +/* + * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either + * replaced in the future by mm.pinned_vm when it becomes stable, or grow into + * a counter on its own. We're aggresive on this bit for now: even if the + * pinned pages were unpinned later on, we'll still keep this bit set for the + * lifecycle of this mm, just for simplicity. + */ +#define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ + +#define MMF_HAS_MDWE 28 +#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) + + +#define MMF_HAS_MDWE_NO_INHERIT 29 + +#define MMF_VM_MERGE_ANY 30 +#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) + +#define MMF_TOPDOWN 31 /* mm searches top down by default */ +#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) + +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ + MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) + +static inline unsigned long mmf_init_flags(unsigned long flags) +{ + if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) + flags &= ~((1UL << MMF_HAS_MDWE) | + (1UL << MMF_HAS_MDWE_NO_INHERIT)); + return flags & MMF_INIT_MASK; +} + #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index a2f6179b672b..a82aa80c0ba4 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -8,6 +8,7 @@ * (These are defined separately to decouple sched.h from mm_types.h as much as possible.) */ +#include <linux/align.h> #include <linux/types.h> #include <asm/page.h> @@ -16,9 +17,6 @@ #include <asm/tlbbatch.h> #endif -#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) -#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ - IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) /* @@ -46,6 +44,26 @@ struct page_frag { #endif }; +#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) +#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) +struct page_frag_cache { + /* encoded_page consists of the virtual address, pfmemalloc bit and + * order of a page. + */ + unsigned long encoded_page; + + /* we maintain a pagecount bias, so that we dont dirty cache line + * containing page->_refcount every time we allocate a fragment. + */ +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32) + __u16 offset; + __u16 pagecnt_bias; +#else + __u32 offset; + __u32 pagecnt_bias; +#endif +}; + /* Track pages that require TLB flushes */ struct tlbflush_unmap_batch { #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH diff --git a/include/linux/mman.h b/include/linux/mman.h index bcb201ab7a41..a842783ffa62 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -2,6 +2,7 @@ #ifndef _LINUX_MMAN_H #define _LINUX_MMAN_H +#include <linux/fs.h> #include <linux/mm.h> #include <linux/percpu_counter.h> @@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long pages) #endif #ifndef arch_calc_vm_flag_bits -#define arch_calc_vm_flag_bits(flags) 0 +#define arch_calc_vm_flag_bits(file, flags) 0 #endif #ifndef arch_validate_prot @@ -151,13 +152,13 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey) * Combine the mmap "flags" argument into "vm_flags" used internally. */ static inline unsigned long -calc_vm_flag_bits(unsigned long flags) +calc_vm_flag_bits(struct file *file, unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | - arch_calc_vm_flag_bits(flags); + arch_calc_vm_flag_bits(file, flags); } unsigned long vm_commit_limit(void); @@ -188,16 +189,31 @@ static inline bool arch_memory_deny_write_exec_supported(void) * * d) mmap(PROT_READ | PROT_EXEC) * mmap(PROT_READ | PROT_EXEC | PROT_BTI) + * + * This is only applicable if the user has set the Memory-Deny-Write-Execute + * (MDWE) protection mask for the current process. + * + * @old specifies the VMA flags the VMA originally possessed, and @new the ones + * we propose to set. + * + * Return: false if proposed change is OK, true if not ok and should be denied. */ -static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags) +static inline bool map_deny_write_exec(unsigned long old, unsigned long new) { + /* If MDWE is disabled, we have nothing to deny. */ if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) return false; - if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE)) + /* If the new VMA is not executable, we have nothing to deny. */ + if (!(new & VM_EXEC)) + return false; + + /* Under MDWE we do not accept newly writably executable VMAs... */ + if (new & VM_WRITE) return true; - if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC)) + /* ...nor previously non-executable VMAs becoming executable. */ + if (!(old & VM_EXEC)) return true; return false; diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index f34407cc2788..526fce581657 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -35,7 +35,7 @@ struct mmc_csd { unsigned int wp_grp_size; unsigned int read_blkbits; unsigned int write_blkbits; - unsigned int capacity; + sector_t capacity; unsigned int read_partial:1, read_misalign:1, write_partial:1, @@ -188,6 +188,12 @@ struct sd_switch_caps { #define SD_MAX_CURRENT_400 (1 << SD_SET_CURRENT_LIMIT_400) #define SD_MAX_CURRENT_600 (1 << SD_SET_CURRENT_LIMIT_600) #define SD_MAX_CURRENT_800 (1 << SD_SET_CURRENT_LIMIT_800) + +#define SD4_SET_POWER_LIMIT_0_72W 0 +#define SD4_SET_POWER_LIMIT_1_44W 1 +#define SD4_SET_POWER_LIMIT_2_16W 2 +#define SD4_SET_POWER_LIMIT_2_88W 3 +#define SD4_SET_POWER_LIMIT_1_80W 4 }; struct sd_ext_reg { @@ -209,6 +215,34 @@ struct sd_ext_reg { #define SD_EXT_PERF_CMD_QUEUE (1<<4) }; +struct sd_uhs2_config { + u32 node_id; + + u32 n_fcu; + u32 maxblk_len; + u8 n_lanes; + u8 dadr_len; + u8 app_type; + u8 phy_minor_rev; + u8 phy_major_rev; + u8 can_hibernate; + u8 n_lss_sync; + u8 n_lss_dir; + u8 link_minor_rev; + u8 link_major_rev; + u8 dev_type; + u8 n_data_gap; + + u32 n_fcu_set; + u32 maxblk_len_set; + u8 n_lanes_set; + u8 speed_range_set; + u8 n_lss_sync_set; + u8 n_lss_dir_set; + u8 n_data_gap_set; + u8 max_retry_set; +}; + struct sdio_cccr { unsigned int sdio_vsn; unsigned int sd_vsn; @@ -294,6 +328,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ +#define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ @@ -319,6 +354,8 @@ struct mmc_card { struct sd_ext_reg ext_power; /* SD extension reg for PM */ struct sd_ext_reg ext_perf; /* SD extension reg for PERF */ + struct sd_uhs2_config uhs2_config; /* SD UHS-II config */ + unsigned int sdio_funcs; /* number of SDIO functions */ atomic_t sdio_funcs_probed; /* number of probed SDIO funcs */ struct sdio_cccr cccr; /* common card info */ diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 2c7928a50907..56972bd78462 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -11,16 +11,18 @@ struct mmc_data; struct mmc_request; -enum mmc_blk_status { - MMC_BLK_SUCCESS = 0, - MMC_BLK_PARTIAL, - MMC_BLK_CMD_ERR, - MMC_BLK_RETRY, - MMC_BLK_ABORT, - MMC_BLK_DATA_ERR, - MMC_BLK_ECC_ERR, - MMC_BLK_NOMEDIUM, - MMC_BLK_NEW_REQUEST, +#define UHS2_MAX_PAYLOAD_LEN 2 +#define UHS2_MAX_RESP_LEN 20 + +struct uhs2_command { + u16 header; + u16 arg; + __be32 payload[UHS2_MAX_PAYLOAD_LEN]; + u8 payload_len; + u8 packet_len; + u8 tmode_half_duplex; + u8 uhs2_resp[UHS2_MAX_RESP_LEN]; /* UHS2 native cmd resp */ + u8 uhs2_resp_len; /* UHS2 native cmd resp len */ }; struct mmc_command { @@ -108,6 +110,12 @@ struct mmc_command { unsigned int busy_timeout; /* busy detect timeout in ms */ struct mmc_data *data; /* data segment associated with cmd */ struct mmc_request *mrq; /* associated request */ + + struct uhs2_command *uhs2_cmd; /* UHS2 command */ + + /* for SDUC */ + bool has_ext_addr; + u8 ext_addr; }; struct mmc_data { @@ -166,6 +174,7 @@ struct mmc_request { const struct bio_crypt_ctx *crypto_ctx; int crypto_key_slot; #endif + struct uhs2_command uhs2_cmd; }; struct mmc_card; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 88c6a76042ee..f166d6611ddb 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -10,12 +10,14 @@ #include <linux/sched.h> #include <linux/device.h> #include <linux/fault-inject.h> +#include <linux/debugfs.h> #include <linux/mmc/core.h> #include <linux/mmc/card.h> #include <linux/mmc/pm.h> #include <linux/dma-direction.h> #include <linux/blk-crypto-profile.h> +#include <linux/mmc/sd_uhs2.h> struct mmc_ios { unsigned int clock; /* clock rate */ @@ -63,6 +65,10 @@ struct mmc_ios { #define MMC_TIMING_MMC_HS400 10 #define MMC_TIMING_SD_EXP 11 #define MMC_TIMING_SD_EXP_1_2V 12 +#define MMC_TIMING_UHS2_SPEED_A 13 +#define MMC_TIMING_UHS2_SPEED_A_HD 14 +#define MMC_TIMING_UHS2_SPEED_B 15 +#define MMC_TIMING_UHS2_SPEED_B_HD 16 unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */ @@ -70,6 +76,9 @@ struct mmc_ios { #define MMC_SIGNAL_VOLTAGE_180 1 #define MMC_SIGNAL_VOLTAGE_120 2 + unsigned char vqmmc2_voltage; +#define MMC_VQMMC2_VOLTAGE_180 0 + unsigned char drv_type; /* driver type (A, B, C, D) */ #define MMC_SET_DRIVER_TYPE_B 0 @@ -91,6 +100,43 @@ struct mmc_clk_phase_map { struct mmc_clk_phase phase[MMC_NUM_CLK_PHASES]; }; +struct sd_uhs2_caps { + u32 dap; + u32 gap; + u32 group_desc; + u32 maxblk_len; + u32 n_fcu; + u8 n_lanes; + u8 addr64; + u8 card_type; + u8 phy_rev; + u8 speed_range; + u8 n_lss_sync; + u8 n_lss_dir; + u8 link_rev; + u8 host_type; + u8 n_data_gap; + + u32 maxblk_len_set; + u32 n_fcu_set; + u8 n_lanes_set; + u8 n_lss_sync_set; + u8 n_lss_dir_set; + u8 n_data_gap_set; + u8 max_retry_set; +}; + +enum sd_uhs2_operation { + UHS2_PHY_INIT = 0, + UHS2_SET_CONFIG, + UHS2_ENABLE_INT, + UHS2_DISABLE_INT, + UHS2_ENABLE_CLK, + UHS2_DISABLE_CLK, + UHS2_CHECK_DORMANT, + UHS2_SET_IOS, +}; + struct mmc_host; enum mmc_err_stat { @@ -218,6 +264,14 @@ struct mmc_host_ops { /* Initialize an SD express card, mandatory for MMC_CAP2_SD_EXP. */ int (*init_sd_express)(struct mmc_host *host, struct mmc_ios *ios); + + /* + * The uhs2_control callback is used to execute SD UHS-II specific + * operations. It's mandatory to implement for hosts that supports the + * SD UHS-II interface (MMC_CAP2_SD_UHS2). Expected return values are a + * negative errno in case of a failure or zero for success. + */ + int (*uhs2_control)(struct mmc_host *host, enum sd_uhs2_operation op); }; struct mmc_cqe_ops { @@ -264,16 +318,6 @@ struct mmc_cqe_ops { void (*cqe_recovery_finish)(struct mmc_host *host); }; -struct mmc_async_req { - /* active mmc request */ - struct mmc_request *mrq; - /* - * Check error status of completed mmc request. - * Returns 0 if success otherwise non zero. - */ - enum mmc_blk_status (*err_check)(struct mmc_card *, struct mmc_async_req *); -}; - /** * struct mmc_slot - MMC slot functions * @@ -291,26 +335,13 @@ struct mmc_slot { void *handler_priv; }; -/** - * mmc_context_info - synchronization details for mmc context - * @is_done_rcv wake up reason was done request - * @is_new_req wake up reason was new request - * @is_waiting_last_req mmc context waiting for single running request - * @wait wait queue - */ -struct mmc_context_info { - bool is_done_rcv; - bool is_new_req; - bool is_waiting_last_req; - wait_queue_head_t wait; -}; - struct regulator; struct mmc_pwrseq; struct mmc_supply { struct regulator *vmmc; /* Card power supply */ struct regulator *vqmmc; /* Optional Vccq supply */ + struct regulator *vqmmc2; /* Optional supply for phy */ }; struct mmc_ctx { @@ -402,6 +433,7 @@ struct mmc_host { MMC_CAP2_HS200_1_2V_SDR) #define MMC_CAP2_SD_EXP (1 << 7) /* SD express via PCIe */ #define MMC_CAP2_SD_EXP_1_2V (1 << 8) /* SD express 1.2V */ +#define MMC_CAP2_SD_UHS2 (1 << 9) /* SD UHS-II support */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ @@ -428,6 +460,10 @@ struct mmc_host { #endif #define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */ + bool uhs2_sd_tran; /* UHS-II flag for SD_TRAN state */ + bool uhs2_app_cmd; /* UHS-II flag for APP command */ + struct sd_uhs2_caps uhs2_caps; /* Host UHS-II capabilities */ + int fixed_drv_type; /* fixed driver type for non-removable media */ mmc_pm_flag_t pm_caps; /* supported pm features */ @@ -590,6 +626,7 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, unsigned short vdd_bit); int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios); +int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, struct mmc_ios *ios); #else static inline int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, @@ -603,6 +640,12 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc, { return -EINVAL; } + +static inline int mmc_regulator_set_vqmmc2(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + return -EINVAL; +} #endif int mmc_regulator_get_supply(struct mmc_host *mmc); @@ -638,6 +681,14 @@ static inline int mmc_card_uhs(struct mmc_card *card) card->host->ios.timing <= MMC_TIMING_UHS_DDR50; } +static inline bool mmc_card_uhs2(struct mmc_host *host) +{ + return host->ios.timing == MMC_TIMING_UHS2_SPEED_A || + host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD; +} + void mmc_retune_timer_stop(struct mmc_host *host); static inline void mmc_retune_needed(struct mmc_host *host) @@ -672,7 +723,14 @@ static inline void mmc_debugfs_err_stats_inc(struct mmc_host *host, host->err_stats[stat] += 1; } -int mmc_sd_switch(struct mmc_card *card, int mode, int group, u8 value, u8 *resp); +static inline int mmc_card_uhs2_hd_mode(struct mmc_host *host) +{ + return host->ios.timing == MMC_TIMING_UHS2_SPEED_A_HD || + host->ios.timing == MMC_TIMING_UHS2_SPEED_B_HD; +} + +int mmc_sd_switch(struct mmc_card *card, bool mode, int group, + u8 value, u8 *resp); int mmc_send_status(struct mmc_card *card, u32 *status); int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode); diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h index 6727576a8755..af5fc70e09a2 100644 --- a/include/linux/mmc/sd.h +++ b/include/linux/mmc/sd.h @@ -15,6 +15,9 @@ #define SD_SEND_IF_COND 8 /* bcr [11:0] See below R7 */ #define SD_SWITCH_VOLTAGE 11 /* ac R1 */ +/* Class 2 */ +#define SD_ADDR_EXT 22 /* ac [5:0] R1 */ + /* class 10 */ #define SD_SWITCH 6 /* adtc [31:0] See below R1 */ @@ -36,6 +39,7 @@ /* OCR bit definitions */ #define SD_OCR_S18R (1 << 24) /* 1.8V switching request */ #define SD_ROCR_S18A SD_OCR_S18R /* 1.8V switching accepted by card */ +#define SD_OCR_2T (1 << 27) /* HO2T/CO2T - SDUC support */ #define SD_OCR_XPC (1 << 28) /* SDXC power control */ #define SD_OCR_CCS (1 << 30) /* Card Capacity Status */ diff --git a/include/linux/mmc/sd_uhs2.h b/include/linux/mmc/sd_uhs2.h new file mode 100644 index 000000000000..7abe9bd870c7 --- /dev/null +++ b/include/linux/mmc/sd_uhs2.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Header file for UHS-II packets, Host Controller registers and I/O + * accessors. + * + * Copyright (C) 2014 Intel Corp, All Rights Reserved. + */ +#ifndef LINUX_MMC_UHS2_H +#define LINUX_MMC_UHS2_H + +/* LINK Layer definition */ +/* + * UHS2 Header: + * Refer to UHS-II Addendum Version 1.02 Figure 5-2, the format of CCMD Header is described below: + * bit [3:0] : DID(Destination ID = Node ID of UHS2 card) + * bit [6:4] : TYP(Packet Type) + * 000b: CCMD(Control command packet) + * 001b: DCMD(Data command packet) + * 010b: RES(Response packet) + * 011b: DATA(Data payload packet) + * 111b: MSG(Message packet) + * Others: Reserved + * bit [7] : NP(Native Packet) + * bit [10:8] : TID(Transaction ID) + * bit [11] : Reserved + * bit [15:12]: SID(Source ID 0: Node ID of Host) + * + * Broadcast CCMD issued by Host is represented as DID=SID=0. + */ +/* + * UHS2 Argument: + * Refer to UHS-II Addendum Version 1.02 Figure 6-5, the format of CCMD Argument is described below: + * bit [3:0] : MSB of IOADR + * bit [5:4] : PLEN(Payload Length) + * 00b: 0 byte + * 01b: 4 bytes + * 10b: 8 bytes + * 11b: 16 bytes + * bit [6] : Reserved + * bit [7] : R/W(Read/Write) + * 0: Control read command + * 1: Control write command + * bit [15:8] : LSB of IOADR + * + * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD. + * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last. + */ +#define UHS2_NATIVE_PACKET_POS 7 +#define UHS2_NATIVE_PACKET (1 << UHS2_NATIVE_PACKET_POS) + +#define UHS2_PACKET_TYPE_POS 4 +#define UHS2_PACKET_TYPE_CCMD (0 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_DCMD (1 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_RES (2 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_DATA (3 << UHS2_PACKET_TYPE_POS) +#define UHS2_PACKET_TYPE_MSG (7 << UHS2_PACKET_TYPE_POS) + +#define UHS2_DEST_ID_MASK 0x0F +#define UHS2_DEST_ID 0x1 + +#define UHS2_SRC_ID_POS 12 +#define UHS2_SRC_ID_MASK 0xF000 + +#define UHS2_TRANS_ID_POS 8 +#define UHS2_TRANS_ID_MASK 0x0700 + +/* UHS2 MSG */ +#define UHS2_MSG_CTG_POS 5 +#define UHS2_MSG_CTG_LMSG 0x00 +#define UHS2_MSG_CTG_INT 0x60 +#define UHS2_MSG_CTG_AMSG 0x80 + +#define UHS2_MSG_CTG_FCREQ 0x00 +#define UHS2_MSG_CTG_FCRDY 0x01 +#define UHS2_MSG_CTG_STAT 0x02 + +#define UHS2_MSG_CODE_POS 8 +#define UHS2_MSG_CODE_FC_UNRECOVER_ERR 0x8 +#define UHS2_MSG_CODE_STAT_UNRECOVER_ERR 0x8 +#define UHS2_MSG_CODE_STAT_RECOVER_ERR 0x1 + +/* TRANS Layer definition */ + +/* Native packets*/ +#define UHS2_NATIVE_CMD_RW_POS 7 +#define UHS2_NATIVE_CMD_WRITE (1 << UHS2_NATIVE_CMD_RW_POS) +#define UHS2_NATIVE_CMD_READ (0 << UHS2_NATIVE_CMD_RW_POS) + +#define UHS2_NATIVE_CMD_PLEN_POS 4 +#define UHS2_NATIVE_CMD_PLEN_4B (1 << UHS2_NATIVE_CMD_PLEN_POS) +#define UHS2_NATIVE_CMD_PLEN_8B (2 << UHS2_NATIVE_CMD_PLEN_POS) +#define UHS2_NATIVE_CMD_PLEN_16B (3 << UHS2_NATIVE_CMD_PLEN_POS) + +#define UHS2_NATIVE_CCMD_GET_MIOADR_MASK 0xF00 +#define UHS2_NATIVE_CCMD_MIOADR_MASK 0x0F + +#define UHS2_NATIVE_CCMD_LIOADR_POS 8 +#define UHS2_NATIVE_CCMD_GET_LIOADR_MASK 0x0FF + +#define UHS2_CCMD_DEV_INIT_COMPLETE_FLAG BIT(11) +#define UHS2_DEV_INIT_PAYLOAD_LEN 1 +#define UHS2_DEV_INIT_RESP_LEN 6 +#define UHS2_DEV_ENUM_PAYLOAD_LEN 1 +#define UHS2_DEV_ENUM_RESP_LEN 8 +#define UHS2_CFG_WRITE_PAYLOAD_LEN 2 +#define UHS2_CFG_WRITE_PHY_SET_RESP_LEN 4 +#define UHS2_CFG_WRITE_GENERIC_SET_RESP_LEN 5 +#define UHS2_GO_DORMANT_PAYLOAD_LEN 1 + +/* + * UHS2 Argument: + * Refer to UHS-II Addendum Version 1.02 Figure 6-8, the format of DCMD Argument is described below: + * bit [3:0] : Reserved + * bit [6:3] : TMODE(Transfer Mode) + * bit 3: DAM(Data Access Mode) + * bit 4: TLUM(TLEN Unit Mode) + * bit 5: LM(Length Mode) + * bit 6: DM(Duplex Mode) + * bit [7] : R/W(Read/Write) + * 0: Control read command + * 1: Control write command + * bit [15:8] : Reserved + * + * I/O Address specifies the address of register in UHS-II I/O space accessed by CCMD. + * The unit of I/O Address is 4 Bytes. It is transmitted in MSB first, LSB last. + */ +#define UHS2_DCMD_DM_POS 6 +#define UHS2_DCMD_2L_HD_MODE (1 << UHS2_DCMD_DM_POS) +#define UHS2_DCMD_LM_POS 5 +#define UHS2_DCMD_LM_TLEN_EXIST (1 << UHS2_DCMD_LM_POS) +#define UHS2_DCMD_TLUM_POS 4 +#define UHS2_DCMD_TLUM_BYTE_MODE (1 << UHS2_DCMD_TLUM_POS) +#define UHS2_NATIVE_DCMD_DAM_POS 3 +#define UHS2_NATIVE_DCMD_DAM_IO (1 << UHS2_NATIVE_DCMD_DAM_POS) + +#define UHS2_RES_NACK_POS 7 +#define UHS2_RES_NACK_MASK (0x1 << UHS2_RES_NACK_POS) + +#define UHS2_RES_ECODE_POS 4 +#define UHS2_RES_ECODE_MASK 0x7 +#define UHS2_RES_ECODE_COND 1 +#define UHS2_RES_ECODE_ARG 2 +#define UHS2_RES_ECODE_GEN 3 + +/* IOADR of device registers */ +#define UHS2_IOADR_GENERIC_CAPS 0x00 +#define UHS2_IOADR_PHY_CAPS 0x02 +#define UHS2_IOADR_LINK_CAPS 0x04 +#define UHS2_IOADR_RSV_CAPS 0x06 +#define UHS2_IOADR_GENERIC_SETTINGS 0x08 +#define UHS2_IOADR_PHY_SETTINGS 0x0A +#define UHS2_IOADR_LINK_SETTINGS 0x0C +#define UHS2_IOADR_PRESET 0x40 + +/* SD application packets */ +#define UHS2_SD_CMD_INDEX_POS 8 + +#define UHS2_SD_CMD_APP_POS 14 +#define UHS2_SD_CMD_APP (1 << UHS2_SD_CMD_APP_POS) + +/* UHS-II Device Registers */ +#define UHS2_DEV_CONFIG_REG 0x000 + +/* General Caps and Settings registers */ +#define UHS2_DEV_CONFIG_GEN_CAPS (UHS2_DEV_CONFIG_REG + 0x000) +#define UHS2_DEV_CONFIG_N_LANES_POS 8 +#define UHS2_DEV_CONFIG_N_LANES_MASK 0x3F +#define UHS2_DEV_CONFIG_2L_HD_FD 0x1 +#define UHS2_DEV_CONFIG_2D1U_FD 0x2 +#define UHS2_DEV_CONFIG_1D2U_FD 0x4 +#define UHS2_DEV_CONFIG_2D2U_FD 0x8 +#define UHS2_DEV_CONFIG_DADR_POS 14 +#define UHS2_DEV_CONFIG_DADR_MASK 0x1 +#define UHS2_DEV_CONFIG_APP_POS 16 +#define UHS2_DEV_CONFIG_APP_MASK 0xFF +#define UHS2_DEV_CONFIG_APP_SD_MEM 0x1 + +#define UHS2_DEV_CONFIG_GEN_SET (UHS2_DEV_CONFIG_REG + 0x008) +#define UHS2_DEV_CONFIG_GEN_SET_N_LANES_POS 8 +#define UHS2_DEV_CONFIG_GEN_SET_2L_FD_HD 0x0 +#define UHS2_DEV_CONFIG_GEN_SET_2D1U_FD 0x2 +#define UHS2_DEV_CONFIG_GEN_SET_1D2U_FD 0x3 +#define UHS2_DEV_CONFIG_GEN_SET_2D2U_FD 0x4 +#define UHS2_DEV_CONFIG_GEN_SET_CFG_COMPLETE BIT(31) + +/* PHY Caps and Settings registers */ +#define UHS2_DEV_CONFIG_PHY_CAPS (UHS2_DEV_CONFIG_REG + 0x002) +#define UHS2_DEV_CONFIG_PHY_MINOR_MASK 0xF +#define UHS2_DEV_CONFIG_PHY_MAJOR_POS 4 +#define UHS2_DEV_CONFIG_PHY_MAJOR_MASK 0x3 +#define UHS2_DEV_CONFIG_CAN_HIBER_POS 15 +#define UHS2_DEV_CONFIG_CAN_HIBER_MASK 0x1 +#define UHS2_DEV_CONFIG_PHY_CAPS1 (UHS2_DEV_CONFIG_REG + 0x003) +#define UHS2_DEV_CONFIG_N_LSS_SYN_MASK 0xF +#define UHS2_DEV_CONFIG_N_LSS_DIR_POS 4 +#define UHS2_DEV_CONFIG_N_LSS_DIR_MASK 0xF + +#define UHS2_DEV_CONFIG_PHY_SET (UHS2_DEV_CONFIG_REG + 0x00A) +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_POS 6 +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_A 0x0 +#define UHS2_DEV_CONFIG_PHY_SET_SPEED_B 0x1 + +/* LINK-TRAN Caps and Settings registers */ +#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS (UHS2_DEV_CONFIG_REG + 0x004) +#define UHS2_DEV_CONFIG_LT_MINOR_MASK 0xF +#define UHS2_DEV_CONFIG_LT_MAJOR_POS 4 +#define UHS2_DEV_CONFIG_LT_MAJOR_MASK 0x3 +#define UHS2_DEV_CONFIG_N_FCU_POS 8 +#define UHS2_DEV_CONFIG_N_FCU_MASK 0xFF +#define UHS2_DEV_CONFIG_DEV_TYPE_POS 16 +#define UHS2_DEV_CONFIG_DEV_TYPE_MASK 0x7 +#define UHS2_DEV_CONFIG_MAX_BLK_LEN_POS 20 +#define UHS2_DEV_CONFIG_MAX_BLK_LEN_MASK 0xFFF +#define UHS2_DEV_CONFIG_LINK_TRAN_CAPS1 (UHS2_DEV_CONFIG_REG + 0x005) +#define UHS2_DEV_CONFIG_N_DATA_GAP_MASK 0xFF + +#define UHS2_DEV_CONFIG_LINK_TRAN_SET (UHS2_DEV_CONFIG_REG + 0x00C) +#define UHS2_DEV_CONFIG_LT_SET_MAX_BLK_LEN 0x200 +#define UHS2_DEV_CONFIG_LT_SET_MAX_RETRY_POS 16 + +/* Preset register */ +#define UHS2_DEV_CONFIG_PRESET (UHS2_DEV_CONFIG_REG + 0x040) + +#define UHS2_DEV_INT_REG 0x100 + +#define UHS2_DEV_STATUS_REG 0x180 + +#define UHS2_DEV_CMD_REG 0x200 +#define UHS2_DEV_CMD_FULL_RESET (UHS2_DEV_CMD_REG + 0x000) +#define UHS2_DEV_CMD_GO_DORMANT_STATE (UHS2_DEV_CMD_REG + 0x001) +#define UHS2_DEV_CMD_DORMANT_HIBER BIT(7) +#define UHS2_DEV_CMD_DEVICE_INIT (UHS2_DEV_CMD_REG + 0x002) +#define UHS2_DEV_INIT_COMPLETE_FLAG BIT(11) +#define UHS2_DEV_CMD_ENUMERATE (UHS2_DEV_CMD_REG + 0x003) +#define UHS2_DEV_CMD_TRANS_ABORT (UHS2_DEV_CMD_REG + 0x004) + +#define UHS2_RCLK_MAX 52000000 +#define UHS2_RCLK_MIN 26000000 + +#endif /* LINUX_MMC_UHS2_H */ diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 39a7714605a7..d7cb1e5ecbda 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -46,7 +46,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); } \ } while (0) #define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ - static bool __section(".data.once") __warned; \ + static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -66,7 +66,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); unlikely(__ret_warn); \ }) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) ({ \ - static bool __section(".data.once") __warned; \ + static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -77,7 +77,7 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); unlikely(__ret_warn_once); \ }) #define VM_WARN_ON_ONCE_MM(cond, mm) ({ \ - static bool __section(".data.once") __warned; \ + static bool __section(".data..once") __warned; \ int __ret_warn_once = !!(cond); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index d39ebb10caeb..e2dd57ca368b 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -606,6 +606,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, return 0; } +static inline int mmu_notifier_clear_young(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + return 0; +} + static inline int mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1dc6248feb83..b36124145a16 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,6 +220,9 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, +#ifdef CONFIG_HUGETLB_PAGE + NR_HUGETLB, +#endif NR_VM_NODE_STAT_ITEMS }; @@ -403,6 +406,8 @@ enum { NR_LRU_GEN_CAPS }; +#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) + #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) @@ -458,9 +463,7 @@ struct lru_gen_folio { enum { MM_LEAF_TOTAL, /* total leaf entries */ - MM_LEAF_OLD, /* old leaf entries */ MM_LEAF_YOUNG, /* young leaf entries */ - MM_NONLEAF_TOTAL, /* total non-leaf entries */ MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ NR_MM_STATS @@ -557,7 +560,7 @@ struct lru_gen_memcg { void lru_gen_init_pgdat(struct pglist_data *pgdat); void lru_gen_init_lruvec(struct lruvec *lruvec); -void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); +bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw); void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); @@ -576,8 +579,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } -static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) { + return false; } static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) @@ -666,11 +670,6 @@ enum zone_watermarks { #define NR_LOWORDER_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1)) #define NR_PCP_LISTS (NR_LOWORDER_PCP_LISTS + NR_PCP_THP) -#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) -#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) -#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) -#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) - /* * Flags used in pcp->flags field. * @@ -829,6 +828,7 @@ struct zone { unsigned long watermark_boost; unsigned long nr_reserved_highatomic; + unsigned long nr_free_highatomic; /* * We don't know if the memory that we're going to allocate will be @@ -1016,6 +1016,32 @@ enum zone_flags { ZONE_BELOW_HIGH, /* zone is below high watermark. */ }; +static inline unsigned long wmark_pages(const struct zone *z, + enum zone_watermarks w) +{ + return z->_watermark[w] + z->watermark_boost; +} + +static inline unsigned long min_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_MIN); +} + +static inline unsigned long low_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_LOW); +} + +static inline unsigned long high_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_HIGH); +} + +static inline unsigned long promo_wmark_pages(const struct zone *z) +{ + return wmark_pages(z, WMARK_PROMO); +} + static inline unsigned long zone_managed_pages(struct zone *zone) { return (unsigned long)atomic_long_read(&zone->managed_pages); @@ -1688,7 +1714,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, zone = zonelist_zone(z)) #define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ - for (zone = z->zone; \ + for (zone = zonelist_zone(z); \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) @@ -1724,7 +1750,7 @@ static inline bool movable_only_nodes(nodemask_t *nodes) nid = first_node(*nodes); zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK]; z = first_zones_zonelist(zonelist, ZONE_NORMAL, nodes); - return (!z->zone) ? true : false; + return (!zonelist_zone(z)) ? true : false; } diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index cd4d5c8781f5..b1b219bc3422 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -9,6 +9,7 @@ struct mnt_idmap; struct user_namespace; extern struct mnt_idmap nop_mnt_idmap; +extern struct mnt_idmap invalid_mnt_idmap; extern struct user_namespace init_user_ns; typedef struct { diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 8f882f5881e8..70b366b64816 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -3,6 +3,9 @@ #define _NAMESPACE_H_ #ifdef __KERNEL__ +#include <linux/cleanup.h> +#include <linux/err.h> + struct mnt_namespace; struct fs_struct; struct user_namespace; @@ -11,6 +14,7 @@ struct ns_common; extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct user_namespace *, struct fs_struct *); extern void put_mnt_ns(struct mnt_namespace *ns); +DEFINE_FREE(put_mnt_ns, struct mnt_namespace *, if (!IS_ERR_OR_NULL(_T)) put_mnt_ns(_T)) extern struct ns_common *from_mnt_ns(struct mnt_namespace *); extern const struct file_operations proc_mounts_operations; diff --git a/include/linux/module.h b/include/linux/module.h index 88ecc5e9f523..94acbacdcdf1 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -247,7 +247,7 @@ extern void cleanup_module(void); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ -extern typeof(name) __mod_##type##__##name##_device_table \ +extern typeof(name) __mod_device_table__##type##__##name \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) @@ -296,7 +296,7 @@ extern typeof(name) __mod_##type##__##name##_device_table \ * files require multiple MODULE_FIRMWARE() specifiers */ #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) -#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, __stringify(ns)) +#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, ns) struct notifier_block; @@ -367,6 +367,8 @@ enum mod_mem_type { struct module_memory { void *base; + void *rw_copy; + bool is_rox; unsigned int size; #ifdef CONFIG_MODULES_TREE_LOOKUP @@ -767,6 +769,15 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); +void *__module_writable_address(struct module *mod, void *loc); + +static inline void *module_writable_address(struct module *mod, void *loc) +{ + if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod) + return loc; + return __module_writable_address(mod, loc); +} + #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -874,6 +885,11 @@ static inline bool module_is_coming(struct module *mod) { return false; } + +static inline void *module_writable_address(struct module *mod, void *loc) +{ + return loc; +} #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index e395461d59e5..1f5507ba5a12 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,6 +108,10 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +int module_post_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod); + #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/include/linux/mpi.h b/include/linux/mpi.h index eb0d1c1db208..47be46f36435 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -40,79 +40,26 @@ struct gcry_mpi { typedef struct gcry_mpi *MPI; #define mpi_get_nlimbs(a) ((a)->nlimbs) -#define mpi_has_sign(a) ((a)->sign) /*-- mpiutil.c --*/ MPI mpi_alloc(unsigned nlimbs); -void mpi_clear(MPI a); void mpi_free(MPI a); int mpi_resize(MPI a, unsigned nlimbs); -static inline MPI mpi_new(unsigned int nbits) -{ - return mpi_alloc((nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB); -} - MPI mpi_copy(MPI a); -MPI mpi_alloc_like(MPI a); -void mpi_snatch(MPI w, MPI u); -MPI mpi_set(MPI w, MPI u); -MPI mpi_set_ui(MPI w, unsigned long u); -MPI mpi_alloc_set_ui(unsigned long u); -void mpi_swap_cond(MPI a, MPI b, unsigned long swap); - -/* Constants used to return constant MPIs. See mpi_init if you - * want to add more constants. - */ -#define MPI_NUMBER_OF_CONSTANTS 6 -enum gcry_mpi_constants { - MPI_C_ZERO, - MPI_C_ONE, - MPI_C_TWO, - MPI_C_THREE, - MPI_C_FOUR, - MPI_C_EIGHT -}; - -MPI mpi_const(enum gcry_mpi_constants no); /*-- mpicoder.c --*/ - -/* Different formats of external big integer representation. */ -enum gcry_mpi_format { - GCRYMPI_FMT_NONE = 0, - GCRYMPI_FMT_STD = 1, /* Twos complement stored without length. */ - GCRYMPI_FMT_PGP = 2, /* As used by OpenPGP (unsigned only). */ - GCRYMPI_FMT_SSH = 3, /* As used by SSH (like STD but with length). */ - GCRYMPI_FMT_HEX = 4, /* Hex format. */ - GCRYMPI_FMT_USG = 5, /* Like STD but unsigned. */ - GCRYMPI_FMT_OPAQUE = 8 /* Opaque format (some functions only). */ -}; - MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes); MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); -int mpi_fromstr(MPI val, const char *str); -MPI mpi_scanval(const char *string); MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len); void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign); int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes, int *sign); -int mpi_print(enum gcry_mpi_format format, unsigned char *buffer, - size_t buflen, size_t *nwritten, MPI a); /*-- mpi-mod.c --*/ -void mpi_mod(MPI rem, MPI dividend, MPI divisor); - -/* Context used with Barrett reduction. */ -struct barrett_ctx_s; -typedef struct barrett_ctx_s *mpi_barrett_t; - -mpi_barrett_t mpi_barrett_init(MPI m, int copy); -void mpi_barrett_free(mpi_barrett_t ctx); -void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx); -void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx); +int mpi_mod(MPI rem, MPI dividend, MPI divisor); /*-- mpi-pow.c --*/ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); @@ -120,7 +67,6 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod); /*-- mpi-cmp.c --*/ int mpi_cmp_ui(MPI u, ulong v); int mpi_cmp(MPI u, MPI v); -int mpi_cmpabs(MPI u, MPI v); /*-- mpi-sub-ui.c --*/ int mpi_sub_ui(MPI w, MPI u, unsigned long vval); @@ -129,138 +75,22 @@ int mpi_sub_ui(MPI w, MPI u, unsigned long vval); void mpi_normalize(MPI a); unsigned mpi_get_nbits(MPI a); int mpi_test_bit(MPI a, unsigned int n); -void mpi_set_bit(MPI a, unsigned int n); -void mpi_set_highbit(MPI a, unsigned int n); -void mpi_clear_highbit(MPI a, unsigned int n); -void mpi_clear_bit(MPI a, unsigned int n); -void mpi_rshift_limbs(MPI a, unsigned int count); -void mpi_rshift(MPI x, MPI a, unsigned int n); -void mpi_lshift_limbs(MPI a, unsigned int count); -void mpi_lshift(MPI x, MPI a, unsigned int n); +int mpi_set_bit(MPI a, unsigned int n); +int mpi_rshift(MPI x, MPI a, unsigned int n); /*-- mpi-add.c --*/ -void mpi_add_ui(MPI w, MPI u, unsigned long v); -void mpi_add(MPI w, MPI u, MPI v); -void mpi_sub(MPI w, MPI u, MPI v); -void mpi_addm(MPI w, MPI u, MPI v, MPI m); -void mpi_subm(MPI w, MPI u, MPI v, MPI m); +int mpi_add(MPI w, MPI u, MPI v); +int mpi_sub(MPI w, MPI u, MPI v); +int mpi_addm(MPI w, MPI u, MPI v, MPI m); +int mpi_subm(MPI w, MPI u, MPI v, MPI m); /*-- mpi-mul.c --*/ -void mpi_mul(MPI w, MPI u, MPI v); -void mpi_mulm(MPI w, MPI u, MPI v, MPI m); +int mpi_mul(MPI w, MPI u, MPI v); +int mpi_mulm(MPI w, MPI u, MPI v, MPI m); /*-- mpi-div.c --*/ -void mpi_tdiv_r(MPI rem, MPI num, MPI den); -void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor); -void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor); - -/*-- mpi-inv.c --*/ -int mpi_invm(MPI x, MPI a, MPI n); - -/*-- ec.c --*/ - -/* Object to represent a point in projective coordinates */ -struct gcry_mpi_point { - MPI x; - MPI y; - MPI z; -}; - -typedef struct gcry_mpi_point *MPI_POINT; - -/* Models describing an elliptic curve */ -enum gcry_mpi_ec_models { - /* The Short Weierstrass equation is - * y^2 = x^3 + ax + b - */ - MPI_EC_WEIERSTRASS = 0, - /* The Montgomery equation is - * by^2 = x^3 + ax^2 + x - */ - MPI_EC_MONTGOMERY, - /* The Twisted Edwards equation is - * ax^2 + y^2 = 1 + bx^2y^2 - * Note that we use 'b' instead of the commonly used 'd'. - */ - MPI_EC_EDWARDS -}; - -/* Dialects used with elliptic curves */ -enum ecc_dialects { - ECC_DIALECT_STANDARD = 0, - ECC_DIALECT_ED25519, - ECC_DIALECT_SAFECURVE -}; - -/* This context is used with all our EC functions. */ -struct mpi_ec_ctx { - enum gcry_mpi_ec_models model; /* The model describing this curve. */ - enum ecc_dialects dialect; /* The ECC dialect used with the curve. */ - int flags; /* Public key flags (not always used). */ - unsigned int nbits; /* Number of bits. */ - - /* Domain parameters. Note that they may not all be set and if set - * the MPIs may be flagged as constant. - */ - MPI p; /* Prime specifying the field GF(p). */ - MPI a; /* First coefficient of the Weierstrass equation. */ - MPI b; /* Second coefficient of the Weierstrass equation. */ - MPI_POINT G; /* Base point (generator). */ - MPI n; /* Order of G. */ - unsigned int h; /* Cofactor. */ - - /* The actual key. May not be set. */ - MPI_POINT Q; /* Public key. */ - MPI d; /* Private key. */ - - const char *name; /* Name of the curve. */ - - /* This structure is private to mpi/ec.c! */ - struct { - struct { - unsigned int a_is_pminus3:1; - unsigned int two_inv_p:1; - } valid; /* Flags to help setting the helper vars below. */ - - int a_is_pminus3; /* True if A = P - 3. */ - - MPI two_inv_p; - - mpi_barrett_t p_barrett; - - /* Scratch variables. */ - MPI scratch[11]; - - /* Helper for fast reduction. */ - /* int nist_nbits; /\* If this is a NIST curve, the # of bits. *\/ */ - /* MPI s[10]; */ - /* MPI c; */ - } t; - - /* Curve specific computation routines for the field. */ - void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); - void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec); - void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx); - void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx); - void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx); -}; - -void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, - enum ecc_dialects dialect, - int flags, MPI p, MPI a, MPI b); -void mpi_ec_deinit(struct mpi_ec_ctx *ctx); -MPI_POINT mpi_point_new(unsigned int nbits); -void mpi_point_release(MPI_POINT p); -void mpi_point_init(MPI_POINT p); -void mpi_point_free_parts(MPI_POINT p); -int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx); -void mpi_ec_add_points(MPI_POINT result, - MPI_POINT p1, MPI_POINT p2, - struct mpi_ec_ctx *ctx); -void mpi_ec_mul_point(MPI_POINT result, - MPI scalar, MPI_POINT point, - struct mpi_ec_ctx *ctx); -int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx); +int mpi_tdiv_r(MPI rem, MPI num, MPI den); +int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor); /* inline functions */ diff --git a/include/linux/msi.h b/include/linux/msi.h index 944979763825..b10093c4d00e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -554,6 +554,8 @@ enum { MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */ MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), + /* PCI MSIs cannot be steered separately to CPU cores */ + MSI_FLAG_NO_AFFINITY = (1 << 21), }; /** diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index b4fa92a6e44b..1b56796f6cb3 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -15,7 +15,7 @@ #include <linux/kernel.h> #include <linux/io.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <asm/barrier.h> #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1 diff --git a/include/linux/mtd/nand-ecc-mxic.h b/include/linux/mtd/nand-ecc-mxic.h index b125926e458c..0da4b2999576 100644 --- a/include/linux/mtd/nand-ecc-mxic.h +++ b/include/linux/mtd/nand-ecc-mxic.h @@ -16,7 +16,7 @@ struct mxic_ecc_engine; #if IS_ENABLED(CONFIG_MTD_NAND_ECC_MXIC) && IS_REACHABLE(CONFIG_MTD_NAND_CORE) -struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); +const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void); struct nand_ecc_engine *mxic_ecc_get_pipelined_engine(struct platform_device *spi_pdev); void mxic_ecc_put_pipelined_engine(struct nand_ecc_engine *eng); int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, @@ -24,7 +24,7 @@ int mxic_ecc_process_data_pipelined(struct nand_ecc_engine *eng, #else /* !CONFIG_MTD_NAND_ECC_MXIC */ -static inline struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) +static inline const struct nand_ecc_engine_ops *mxic_ecc_get_pipelined_ops(void) { return NULL; } diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index b2996dc987ff..0e2f228e8b4a 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -103,6 +103,8 @@ enum nand_page_io_req_type { * @ooblen: the number of OOB bytes to read from/write to this page * @oobbuf: buffer to store OOB data in or get OOB data from * @mode: one of the %MTD_OPS_XXX mode + * @continuous: no need to start over the operation at the end of each page, the + * NAND device will automatically prepare the next one * * This object is used to pass per-page I/O requests to NAND sub-layers. This * way all useful information are already formatted in a useful way and @@ -125,6 +127,7 @@ struct nand_page_io_req { void *in; } oobbuf; int mode; + bool continuous; }; const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void); @@ -290,7 +293,7 @@ enum nand_ecc_engine_integration { struct nand_ecc_engine { struct device *dev; struct list_head node; - struct nand_ecc_engine_ops *ops; + const struct nand_ecc_engine_ops *ops; enum nand_ecc_engine_integration integration; void *priv; }; @@ -906,19 +909,19 @@ static inline void nanddev_pos_next_page(struct nand_device *nand, } /** - * nand_io_iter_init - Initialize a NAND I/O iterator + * nand_io_page_iter_init - Initialize a NAND I/O iterator * @nand: NAND device * @offs: absolute offset * @req: MTD request * @iter: NAND I/O iterator * * Initializes a NAND iterator based on the information passed by the MTD - * layer. + * layer for page jumps. */ -static inline void nanddev_io_iter_init(struct nand_device *nand, - enum nand_page_io_req_type reqtype, - loff_t offs, struct mtd_oob_ops *req, - struct nand_io_iter *iter) +static inline void nanddev_io_page_iter_init(struct nand_device *nand, + enum nand_page_io_req_type reqtype, + loff_t offs, struct mtd_oob_ops *req, + struct nand_io_iter *iter) { struct mtd_info *mtd = nanddev_to_mtd(nand); @@ -937,6 +940,43 @@ static inline void nanddev_io_iter_init(struct nand_device *nand, iter->req.ooblen = min_t(unsigned int, iter->oobbytes_per_page - iter->req.ooboffs, iter->oobleft); + iter->req.continuous = false; +} + +/** + * nand_io_block_iter_init - Initialize a NAND I/O iterator + * @nand: NAND device + * @offs: absolute offset + * @req: MTD request + * @iter: NAND I/O iterator + * + * Initializes a NAND iterator based on the information passed by the MTD + * layer for block jumps (no OOB) + * + * In practice only reads may leverage this iterator. + */ +static inline void nanddev_io_block_iter_init(struct nand_device *nand, + enum nand_page_io_req_type reqtype, + loff_t offs, struct mtd_oob_ops *req, + struct nand_io_iter *iter) +{ + unsigned int offs_in_eb; + + iter->req.type = reqtype; + iter->req.mode = req->mode; + iter->req.dataoffs = nanddev_offs_to_pos(nand, offs, &iter->req.pos); + iter->req.ooboffs = 0; + iter->oobbytes_per_page = 0; + iter->dataleft = req->len; + iter->oobleft = 0; + iter->req.databuf.in = req->datbuf; + offs_in_eb = (nand->memorg.pagesize * iter->req.pos.page) + iter->req.dataoffs; + iter->req.datalen = min_t(unsigned int, + nanddev_eraseblock_size(nand) - offs_in_eb, + iter->dataleft); + iter->req.oobbuf.in = NULL; + iter->req.ooblen = 0; + iter->req.continuous = true; } /** @@ -963,6 +1003,25 @@ static inline void nanddev_io_iter_next_page(struct nand_device *nand, } /** + * nand_io_iter_next_block - Move to the next block + * @nand: NAND device + * @iter: NAND I/O iterator + * + * Updates the @iter to point to the next block. + * No OOB handling available. + */ +static inline void nanddev_io_iter_next_block(struct nand_device *nand, + struct nand_io_iter *iter) +{ + nanddev_pos_next_eraseblock(nand, &iter->req.pos); + iter->dataleft -= iter->req.datalen; + iter->req.databuf.in += iter->req.datalen; + iter->req.dataoffs = 0; + iter->req.datalen = min_t(unsigned int, nanddev_eraseblock_size(nand), + iter->dataleft); +} + +/** * nand_io_iter_end - Should end iteration or not * @nand: NAND device * @iter: NAND I/O iterator @@ -990,13 +1049,28 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand, * @req: MTD I/O request * @iter: NAND I/O iterator * - * Should be used for iterate over pages that are contained in an MTD request. + * Should be used for iterating over pages that are contained in an MTD request. */ #define nanddev_io_for_each_page(nand, type, start, req, iter) \ - for (nanddev_io_iter_init(nand, type, start, req, iter); \ + for (nanddev_io_page_iter_init(nand, type, start, req, iter); \ !nanddev_io_iter_end(nand, iter); \ nanddev_io_iter_next_page(nand, iter)) +/** + * nand_io_for_each_block - Iterate over all NAND pages contained in an MTD I/O + * request, one block at a time + * @nand: NAND device + * @start: start address to read/write from + * @req: MTD I/O request + * @iter: NAND I/O iterator + * + * Should be used for iterating over blocks that are contained in an MTD request. + */ +#define nanddev_io_for_each_block(nand, type, start, req, iter) \ + for (nanddev_io_block_iter_init(nand, type, start, req, iter); \ + !nanddev_io_iter_end(nand, iter); \ + nanddev_io_iter_next_block(nand, iter)) + bool nanddev_isbad(struct nand_device *nand, const struct nand_pos *pos); bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos); int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos); diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 5c19ead60499..702e5fb13dae 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -312,6 +312,8 @@ struct spinand_ecc_info { #define SPINAND_HAS_QE_BIT BIT(0) #define SPINAND_HAS_CR_FEAT_BIT BIT(1) +#define SPINAND_HAS_PROG_PLANE_SELECT_BIT BIT(2) +#define SPINAND_HAS_READ_PLANE_SELECT_BIT BIT(3) /** * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure @@ -336,6 +338,7 @@ struct spinand_ondie_ecc_conf { * @op_variants.update_cache: variants of the update-cache operation * @select_target: function used to select a target/die. Required only for * multi-die chips + * @set_cont_read: enable/disable continuous cached reads * * Each SPI NAND manufacturer driver should have a spinand_info table * describing all the chips supported by the driver. @@ -354,6 +357,8 @@ struct spinand_info { } op_variants; int (*select_target)(struct spinand_device *spinand, unsigned int target); + int (*set_cont_read)(struct spinand_device *spinand, + bool enable); }; #define SPINAND_ID(__method, ...) \ @@ -379,6 +384,9 @@ struct spinand_info { #define SPINAND_SELECT_TARGET(__func) \ .select_target = __func, +#define SPINAND_CONT_READ(__set_cont_read) \ + .set_cont_read = __set_cont_read, + #define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants, \ __flags, ...) \ { \ @@ -422,6 +430,12 @@ struct spinand_dirmap { * passed in spi_mem_op be DMA-able, so we can't based the bufs on * the stack * @manufacturer: SPI NAND manufacturer information + * @cont_read_possible: Field filled by the core once the whole system + * configuration is known to tell whether continuous reads are + * suitable to use or not in general with this chip/configuration. + * A per-transfer check must of course be done to ensure it is + * actually relevant to enable this feature. + * @set_cont_read: Enable/disable the continuous read feature * @priv: manufacturer private data */ struct spinand_device { @@ -451,6 +465,10 @@ struct spinand_device { u8 *scratchbuf; const struct spinand_manufacturer *manufacturer; void *priv; + + bool cont_read_possible; + int (*set_cont_read)(struct spinand_device *spinand, + bool enable); }; /** @@ -517,6 +535,7 @@ int spinand_match_and_init(struct spinand_device *spinand, enum spinand_readid_method rdid_method); int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); +int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val); int spinand_select_target(struct spinand_device *spinand, unsigned int target); #endif /* __LINUX_MTD_SPINAND_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a561c629d89f..2bf91b57591b 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -49,7 +49,6 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif -#ifndef CONFIG_PREEMPT_RT /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -65,6 +64,18 @@ do { \ __mutex_init((mutex), #mutex, &__key); \ } while (0) +/** + * mutex_init_with_key - initialize a mutex with a given lockdep key + * @mutex: the mutex to be initialized + * @key: the lockdep key to be associated with the mutex + * + * Initialize the mutex to the unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ +#define mutex_init_with_key(mutex, key) __mutex_init((mutex), #mutex, (key)) + +#ifndef CONFIG_PREEMPT_RT #define __MUTEX_INITIALIZER(lockname) \ { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ @@ -111,12 +122,6 @@ do { \ __mutex_rt_init((mutex), name, key); \ } while (0) -#define mutex_init(mutex) \ -do { \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key); \ -} while (0) #endif /* CONFIG_PREEMPT_RT */ #ifdef CONFIG_DEBUG_MUTEXES diff --git a/include/linux/mv643xx.h b/include/linux/mv643xx.h deleted file mode 100644 index 000b126acfb6..000000000000 --- a/include/linux/mv643xx.h +++ /dev/null @@ -1,921 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mv643xx.h - MV-643XX Internal registers definition file. - * - * Copyright 2002 Momentum Computer, Inc. - * Author: Matthew Dharm <mdharm@momenco.com> - * Copyright 2002 GALILEO TECHNOLOGY, LTD. - */ -#ifndef __ASM_MV643XX_H -#define __ASM_MV643XX_H - -#include <asm/types.h> -#include <linux/mv643xx_eth.h> -#include <linux/mv643xx_i2c.h> - -/****************************************/ -/* Processor Address Space */ -/****************************************/ - -/* DDR SDRAM BAR and size registers */ - -#define MV64340_CS_0_BASE_ADDR 0x008 -#define MV64340_CS_0_SIZE 0x010 -#define MV64340_CS_1_BASE_ADDR 0x208 -#define MV64340_CS_1_SIZE 0x210 -#define MV64340_CS_2_BASE_ADDR 0x018 -#define MV64340_CS_2_SIZE 0x020 -#define MV64340_CS_3_BASE_ADDR 0x218 -#define MV64340_CS_3_SIZE 0x220 - -/* Devices BAR and size registers */ - -#define MV64340_DEV_CS0_BASE_ADDR 0x028 -#define MV64340_DEV_CS0_SIZE 0x030 -#define MV64340_DEV_CS1_BASE_ADDR 0x228 -#define MV64340_DEV_CS1_SIZE 0x230 -#define MV64340_DEV_CS2_BASE_ADDR 0x248 -#define MV64340_DEV_CS2_SIZE 0x250 -#define MV64340_DEV_CS3_BASE_ADDR 0x038 -#define MV64340_DEV_CS3_SIZE 0x040 -#define MV64340_BOOTCS_BASE_ADDR 0x238 -#define MV64340_BOOTCS_SIZE 0x240 - -/* PCI 0 BAR and size registers */ - -#define MV64340_PCI_0_IO_BASE_ADDR 0x048 -#define MV64340_PCI_0_IO_SIZE 0x050 -#define MV64340_PCI_0_MEMORY0_BASE_ADDR 0x058 -#define MV64340_PCI_0_MEMORY0_SIZE 0x060 -#define MV64340_PCI_0_MEMORY1_BASE_ADDR 0x080 -#define MV64340_PCI_0_MEMORY1_SIZE 0x088 -#define MV64340_PCI_0_MEMORY2_BASE_ADDR 0x258 -#define MV64340_PCI_0_MEMORY2_SIZE 0x260 -#define MV64340_PCI_0_MEMORY3_BASE_ADDR 0x280 -#define MV64340_PCI_0_MEMORY3_SIZE 0x288 - -/* PCI 1 BAR and size registers */ -#define MV64340_PCI_1_IO_BASE_ADDR 0x090 -#define MV64340_PCI_1_IO_SIZE 0x098 -#define MV64340_PCI_1_MEMORY0_BASE_ADDR 0x0a0 -#define MV64340_PCI_1_MEMORY0_SIZE 0x0a8 -#define MV64340_PCI_1_MEMORY1_BASE_ADDR 0x0b0 -#define MV64340_PCI_1_MEMORY1_SIZE 0x0b8 -#define MV64340_PCI_1_MEMORY2_BASE_ADDR 0x2a0 -#define MV64340_PCI_1_MEMORY2_SIZE 0x2a8 -#define MV64340_PCI_1_MEMORY3_BASE_ADDR 0x2b0 -#define MV64340_PCI_1_MEMORY3_SIZE 0x2b8 - -/* SRAM base address */ -#define MV64340_INTEGRATED_SRAM_BASE_ADDR 0x268 - -/* internal registers space base address */ -#define MV64340_INTERNAL_SPACE_BASE_ADDR 0x068 - -/* Enables the CS , DEV_CS , PCI 0 and PCI 1 - windows above */ -#define MV64340_BASE_ADDR_ENABLE 0x278 - -/****************************************/ -/* PCI remap registers */ -/****************************************/ - /* PCI 0 */ -#define MV64340_PCI_0_IO_ADDR_REMAP 0x0f0 -#define MV64340_PCI_0_MEMORY0_LOW_ADDR_REMAP 0x0f8 -#define MV64340_PCI_0_MEMORY0_HIGH_ADDR_REMAP 0x320 -#define MV64340_PCI_0_MEMORY1_LOW_ADDR_REMAP 0x100 -#define MV64340_PCI_0_MEMORY1_HIGH_ADDR_REMAP 0x328 -#define MV64340_PCI_0_MEMORY2_LOW_ADDR_REMAP 0x2f8 -#define MV64340_PCI_0_MEMORY2_HIGH_ADDR_REMAP 0x330 -#define MV64340_PCI_0_MEMORY3_LOW_ADDR_REMAP 0x300 -#define MV64340_PCI_0_MEMORY3_HIGH_ADDR_REMAP 0x338 - /* PCI 1 */ -#define MV64340_PCI_1_IO_ADDR_REMAP 0x108 -#define MV64340_PCI_1_MEMORY0_LOW_ADDR_REMAP 0x110 -#define MV64340_PCI_1_MEMORY0_HIGH_ADDR_REMAP 0x340 -#define MV64340_PCI_1_MEMORY1_LOW_ADDR_REMAP 0x118 -#define MV64340_PCI_1_MEMORY1_HIGH_ADDR_REMAP 0x348 -#define MV64340_PCI_1_MEMORY2_LOW_ADDR_REMAP 0x310 -#define MV64340_PCI_1_MEMORY2_HIGH_ADDR_REMAP 0x350 -#define MV64340_PCI_1_MEMORY3_LOW_ADDR_REMAP 0x318 -#define MV64340_PCI_1_MEMORY3_HIGH_ADDR_REMAP 0x358 - -#define MV64340_CPU_PCI_0_HEADERS_RETARGET_CONTROL 0x3b0 -#define MV64340_CPU_PCI_0_HEADERS_RETARGET_BASE 0x3b8 -#define MV64340_CPU_PCI_1_HEADERS_RETARGET_CONTROL 0x3c0 -#define MV64340_CPU_PCI_1_HEADERS_RETARGET_BASE 0x3c8 -#define MV64340_CPU_GE_HEADERS_RETARGET_CONTROL 0x3d0 -#define MV64340_CPU_GE_HEADERS_RETARGET_BASE 0x3d8 -#define MV64340_CPU_IDMA_HEADERS_RETARGET_CONTROL 0x3e0 -#define MV64340_CPU_IDMA_HEADERS_RETARGET_BASE 0x3e8 - -/****************************************/ -/* CPU Control Registers */ -/****************************************/ - -#define MV64340_CPU_CONFIG 0x000 -#define MV64340_CPU_MODE 0x120 -#define MV64340_CPU_MASTER_CONTROL 0x160 -#define MV64340_CPU_CROSS_BAR_CONTROL_LOW 0x150 -#define MV64340_CPU_CROSS_BAR_CONTROL_HIGH 0x158 -#define MV64340_CPU_CROSS_BAR_TIMEOUT 0x168 - -/****************************************/ -/* SMP RegisterS */ -/****************************************/ - -#define MV64340_SMP_WHO_AM_I 0x200 -#define MV64340_SMP_CPU0_DOORBELL 0x214 -#define MV64340_SMP_CPU0_DOORBELL_CLEAR 0x21C -#define MV64340_SMP_CPU1_DOORBELL 0x224 -#define MV64340_SMP_CPU1_DOORBELL_CLEAR 0x22C -#define MV64340_SMP_CPU0_DOORBELL_MASK 0x234 -#define MV64340_SMP_CPU1_DOORBELL_MASK 0x23C -#define MV64340_SMP_SEMAPHOR0 0x244 -#define MV64340_SMP_SEMAPHOR1 0x24c -#define MV64340_SMP_SEMAPHOR2 0x254 -#define MV64340_SMP_SEMAPHOR3 0x25c -#define MV64340_SMP_SEMAPHOR4 0x264 -#define MV64340_SMP_SEMAPHOR5 0x26c -#define MV64340_SMP_SEMAPHOR6 0x274 -#define MV64340_SMP_SEMAPHOR7 0x27c - -/****************************************/ -/* CPU Sync Barrier Register */ -/****************************************/ - -#define MV64340_CPU_0_SYNC_BARRIER_TRIGGER 0x0c0 -#define MV64340_CPU_0_SYNC_BARRIER_VIRTUAL 0x0c8 -#define MV64340_CPU_1_SYNC_BARRIER_TRIGGER 0x0d0 -#define MV64340_CPU_1_SYNC_BARRIER_VIRTUAL 0x0d8 - -/****************************************/ -/* CPU Access Protect */ -/****************************************/ - -#define MV64340_CPU_PROTECT_WINDOW_0_BASE_ADDR 0x180 -#define MV64340_CPU_PROTECT_WINDOW_0_SIZE 0x188 -#define MV64340_CPU_PROTECT_WINDOW_1_BASE_ADDR 0x190 -#define MV64340_CPU_PROTECT_WINDOW_1_SIZE 0x198 -#define MV64340_CPU_PROTECT_WINDOW_2_BASE_ADDR 0x1a0 -#define MV64340_CPU_PROTECT_WINDOW_2_SIZE 0x1a8 -#define MV64340_CPU_PROTECT_WINDOW_3_BASE_ADDR 0x1b0 -#define MV64340_CPU_PROTECT_WINDOW_3_SIZE 0x1b8 - - -/****************************************/ -/* CPU Error Report */ -/****************************************/ - -#define MV64340_CPU_ERROR_ADDR_LOW 0x070 -#define MV64340_CPU_ERROR_ADDR_HIGH 0x078 -#define MV64340_CPU_ERROR_DATA_LOW 0x128 -#define MV64340_CPU_ERROR_DATA_HIGH 0x130 -#define MV64340_CPU_ERROR_PARITY 0x138 -#define MV64340_CPU_ERROR_CAUSE 0x140 -#define MV64340_CPU_ERROR_MASK 0x148 - -/****************************************/ -/* CPU Interface Debug Registers */ -/****************************************/ - -#define MV64340_PUNIT_SLAVE_DEBUG_LOW 0x360 -#define MV64340_PUNIT_SLAVE_DEBUG_HIGH 0x368 -#define MV64340_PUNIT_MASTER_DEBUG_LOW 0x370 -#define MV64340_PUNIT_MASTER_DEBUG_HIGH 0x378 -#define MV64340_PUNIT_MMASK 0x3e4 - -/****************************************/ -/* Integrated SRAM Registers */ -/****************************************/ - -#define MV64340_SRAM_CONFIG 0x380 -#define MV64340_SRAM_TEST_MODE 0X3F4 -#define MV64340_SRAM_ERROR_CAUSE 0x388 -#define MV64340_SRAM_ERROR_ADDR 0x390 -#define MV64340_SRAM_ERROR_ADDR_HIGH 0X3F8 -#define MV64340_SRAM_ERROR_DATA_LOW 0x398 -#define MV64340_SRAM_ERROR_DATA_HIGH 0x3a0 -#define MV64340_SRAM_ERROR_DATA_PARITY 0x3a8 - -/****************************************/ -/* SDRAM Configuration */ -/****************************************/ - -#define MV64340_SDRAM_CONFIG 0x1400 -#define MV64340_D_UNIT_CONTROL_LOW 0x1404 -#define MV64340_D_UNIT_CONTROL_HIGH 0x1424 -#define MV64340_SDRAM_TIMING_CONTROL_LOW 0x1408 -#define MV64340_SDRAM_TIMING_CONTROL_HIGH 0x140c -#define MV64340_SDRAM_ADDR_CONTROL 0x1410 -#define MV64340_SDRAM_OPEN_PAGES_CONTROL 0x1414 -#define MV64340_SDRAM_OPERATION 0x1418 -#define MV64340_SDRAM_MODE 0x141c -#define MV64340_EXTENDED_DRAM_MODE 0x1420 -#define MV64340_SDRAM_CROSS_BAR_CONTROL_LOW 0x1430 -#define MV64340_SDRAM_CROSS_BAR_CONTROL_HIGH 0x1434 -#define MV64340_SDRAM_CROSS_BAR_TIMEOUT 0x1438 -#define MV64340_SDRAM_ADDR_CTRL_PADS_CALIBRATION 0x14c0 -#define MV64340_SDRAM_DATA_PADS_CALIBRATION 0x14c4 - -/****************************************/ -/* SDRAM Error Report */ -/****************************************/ - -#define MV64340_SDRAM_ERROR_DATA_LOW 0x1444 -#define MV64340_SDRAM_ERROR_DATA_HIGH 0x1440 -#define MV64340_SDRAM_ERROR_ADDR 0x1450 -#define MV64340_SDRAM_RECEIVED_ECC 0x1448 -#define MV64340_SDRAM_CALCULATED_ECC 0x144c -#define MV64340_SDRAM_ECC_CONTROL 0x1454 -#define MV64340_SDRAM_ECC_ERROR_COUNTER 0x1458 - -/******************************************/ -/* Controlled Delay Line (CDL) Registers */ -/******************************************/ - -#define MV64340_DFCDL_CONFIG0 0x1480 -#define MV64340_DFCDL_CONFIG1 0x1484 -#define MV64340_DLL_WRITE 0x1488 -#define MV64340_DLL_READ 0x148c -#define MV64340_SRAM_ADDR 0x1490 -#define MV64340_SRAM_DATA0 0x1494 -#define MV64340_SRAM_DATA1 0x1498 -#define MV64340_SRAM_DATA2 0x149c -#define MV64340_DFCL_PROBE 0x14a0 - -/******************************************/ -/* Debug Registers */ -/******************************************/ - -#define MV64340_DUNIT_DEBUG_LOW 0x1460 -#define MV64340_DUNIT_DEBUG_HIGH 0x1464 -#define MV64340_DUNIT_MMASK 0X1b40 - -/****************************************/ -/* Device Parameters */ -/****************************************/ - -#define MV64340_DEVICE_BANK0_PARAMETERS 0x45c -#define MV64340_DEVICE_BANK1_PARAMETERS 0x460 -#define MV64340_DEVICE_BANK2_PARAMETERS 0x464 -#define MV64340_DEVICE_BANK3_PARAMETERS 0x468 -#define MV64340_DEVICE_BOOT_BANK_PARAMETERS 0x46c -#define MV64340_DEVICE_INTERFACE_CONTROL 0x4c0 -#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_LOW 0x4c8 -#define MV64340_DEVICE_INTERFACE_CROSS_BAR_CONTROL_HIGH 0x4cc -#define MV64340_DEVICE_INTERFACE_CROSS_BAR_TIMEOUT 0x4c4 - -/****************************************/ -/* Device interrupt registers */ -/****************************************/ - -#define MV64340_DEVICE_INTERRUPT_CAUSE 0x4d0 -#define MV64340_DEVICE_INTERRUPT_MASK 0x4d4 -#define MV64340_DEVICE_ERROR_ADDR 0x4d8 -#define MV64340_DEVICE_ERROR_DATA 0x4dc -#define MV64340_DEVICE_ERROR_PARITY 0x4e0 - -/****************************************/ -/* Device debug registers */ -/****************************************/ - -#define MV64340_DEVICE_DEBUG_LOW 0x4e4 -#define MV64340_DEVICE_DEBUG_HIGH 0x4e8 -#define MV64340_RUNIT_MMASK 0x4f0 - -/****************************************/ -/* PCI Slave Address Decoding registers */ -/****************************************/ - -#define MV64340_PCI_0_CS_0_BANK_SIZE 0xc08 -#define MV64340_PCI_1_CS_0_BANK_SIZE 0xc88 -#define MV64340_PCI_0_CS_1_BANK_SIZE 0xd08 -#define MV64340_PCI_1_CS_1_BANK_SIZE 0xd88 -#define MV64340_PCI_0_CS_2_BANK_SIZE 0xc0c -#define MV64340_PCI_1_CS_2_BANK_SIZE 0xc8c -#define MV64340_PCI_0_CS_3_BANK_SIZE 0xd0c -#define MV64340_PCI_1_CS_3_BANK_SIZE 0xd8c -#define MV64340_PCI_0_DEVCS_0_BANK_SIZE 0xc10 -#define MV64340_PCI_1_DEVCS_0_BANK_SIZE 0xc90 -#define MV64340_PCI_0_DEVCS_1_BANK_SIZE 0xd10 -#define MV64340_PCI_1_DEVCS_1_BANK_SIZE 0xd90 -#define MV64340_PCI_0_DEVCS_2_BANK_SIZE 0xd18 -#define MV64340_PCI_1_DEVCS_2_BANK_SIZE 0xd98 -#define MV64340_PCI_0_DEVCS_3_BANK_SIZE 0xc14 -#define MV64340_PCI_1_DEVCS_3_BANK_SIZE 0xc94 -#define MV64340_PCI_0_DEVCS_BOOT_BANK_SIZE 0xd14 -#define MV64340_PCI_1_DEVCS_BOOT_BANK_SIZE 0xd94 -#define MV64340_PCI_0_P2P_MEM0_BAR_SIZE 0xd1c -#define MV64340_PCI_1_P2P_MEM0_BAR_SIZE 0xd9c -#define MV64340_PCI_0_P2P_MEM1_BAR_SIZE 0xd20 -#define MV64340_PCI_1_P2P_MEM1_BAR_SIZE 0xda0 -#define MV64340_PCI_0_P2P_I_O_BAR_SIZE 0xd24 -#define MV64340_PCI_1_P2P_I_O_BAR_SIZE 0xda4 -#define MV64340_PCI_0_CPU_BAR_SIZE 0xd28 -#define MV64340_PCI_1_CPU_BAR_SIZE 0xda8 -#define MV64340_PCI_0_INTERNAL_SRAM_BAR_SIZE 0xe00 -#define MV64340_PCI_1_INTERNAL_SRAM_BAR_SIZE 0xe80 -#define MV64340_PCI_0_EXPANSION_ROM_BAR_SIZE 0xd2c -#define MV64340_PCI_1_EXPANSION_ROM_BAR_SIZE 0xd9c -#define MV64340_PCI_0_BASE_ADDR_REG_ENABLE 0xc3c -#define MV64340_PCI_1_BASE_ADDR_REG_ENABLE 0xcbc -#define MV64340_PCI_0_CS_0_BASE_ADDR_REMAP 0xc48 -#define MV64340_PCI_1_CS_0_BASE_ADDR_REMAP 0xcc8 -#define MV64340_PCI_0_CS_1_BASE_ADDR_REMAP 0xd48 -#define MV64340_PCI_1_CS_1_BASE_ADDR_REMAP 0xdc8 -#define MV64340_PCI_0_CS_2_BASE_ADDR_REMAP 0xc4c -#define MV64340_PCI_1_CS_2_BASE_ADDR_REMAP 0xccc -#define MV64340_PCI_0_CS_3_BASE_ADDR_REMAP 0xd4c -#define MV64340_PCI_1_CS_3_BASE_ADDR_REMAP 0xdcc -#define MV64340_PCI_0_CS_0_BASE_HIGH_ADDR_REMAP 0xF04 -#define MV64340_PCI_1_CS_0_BASE_HIGH_ADDR_REMAP 0xF84 -#define MV64340_PCI_0_CS_1_BASE_HIGH_ADDR_REMAP 0xF08 -#define MV64340_PCI_1_CS_1_BASE_HIGH_ADDR_REMAP 0xF88 -#define MV64340_PCI_0_CS_2_BASE_HIGH_ADDR_REMAP 0xF0C -#define MV64340_PCI_1_CS_2_BASE_HIGH_ADDR_REMAP 0xF8C -#define MV64340_PCI_0_CS_3_BASE_HIGH_ADDR_REMAP 0xF10 -#define MV64340_PCI_1_CS_3_BASE_HIGH_ADDR_REMAP 0xF90 -#define MV64340_PCI_0_DEVCS_0_BASE_ADDR_REMAP 0xc50 -#define MV64340_PCI_1_DEVCS_0_BASE_ADDR_REMAP 0xcd0 -#define MV64340_PCI_0_DEVCS_1_BASE_ADDR_REMAP 0xd50 -#define MV64340_PCI_1_DEVCS_1_BASE_ADDR_REMAP 0xdd0 -#define MV64340_PCI_0_DEVCS_2_BASE_ADDR_REMAP 0xd58 -#define MV64340_PCI_1_DEVCS_2_BASE_ADDR_REMAP 0xdd8 -#define MV64340_PCI_0_DEVCS_3_BASE_ADDR_REMAP 0xc54 -#define MV64340_PCI_1_DEVCS_3_BASE_ADDR_REMAP 0xcd4 -#define MV64340_PCI_0_DEVCS_BOOTCS_BASE_ADDR_REMAP 0xd54 -#define MV64340_PCI_1_DEVCS_BOOTCS_BASE_ADDR_REMAP 0xdd4 -#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_LOW 0xd5c -#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_LOW 0xddc -#define MV64340_PCI_0_P2P_MEM0_BASE_ADDR_REMAP_HIGH 0xd60 -#define MV64340_PCI_1_P2P_MEM0_BASE_ADDR_REMAP_HIGH 0xde0 -#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_LOW 0xd64 -#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_LOW 0xde4 -#define MV64340_PCI_0_P2P_MEM1_BASE_ADDR_REMAP_HIGH 0xd68 -#define MV64340_PCI_1_P2P_MEM1_BASE_ADDR_REMAP_HIGH 0xde8 -#define MV64340_PCI_0_P2P_I_O_BASE_ADDR_REMAP 0xd6c -#define MV64340_PCI_1_P2P_I_O_BASE_ADDR_REMAP 0xdec -#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_LOW 0xd70 -#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_LOW 0xdf0 -#define MV64340_PCI_0_CPU_BASE_ADDR_REMAP_HIGH 0xd74 -#define MV64340_PCI_1_CPU_BASE_ADDR_REMAP_HIGH 0xdf4 -#define MV64340_PCI_0_INTEGRATED_SRAM_BASE_ADDR_REMAP 0xf00 -#define MV64340_PCI_1_INTEGRATED_SRAM_BASE_ADDR_REMAP 0xf80 -#define MV64340_PCI_0_EXPANSION_ROM_BASE_ADDR_REMAP 0xf38 -#define MV64340_PCI_1_EXPANSION_ROM_BASE_ADDR_REMAP 0xfb8 -#define MV64340_PCI_0_ADDR_DECODE_CONTROL 0xd3c -#define MV64340_PCI_1_ADDR_DECODE_CONTROL 0xdbc -#define MV64340_PCI_0_HEADERS_RETARGET_CONTROL 0xF40 -#define MV64340_PCI_1_HEADERS_RETARGET_CONTROL 0xFc0 -#define MV64340_PCI_0_HEADERS_RETARGET_BASE 0xF44 -#define MV64340_PCI_1_HEADERS_RETARGET_BASE 0xFc4 -#define MV64340_PCI_0_HEADERS_RETARGET_HIGH 0xF48 -#define MV64340_PCI_1_HEADERS_RETARGET_HIGH 0xFc8 - -/***********************************/ -/* PCI Control Register Map */ -/***********************************/ - -#define MV64340_PCI_0_DLL_STATUS_AND_COMMAND 0x1d20 -#define MV64340_PCI_1_DLL_STATUS_AND_COMMAND 0x1da0 -#define MV64340_PCI_0_MPP_PADS_DRIVE_CONTROL 0x1d1C -#define MV64340_PCI_1_MPP_PADS_DRIVE_CONTROL 0x1d9C -#define MV64340_PCI_0_COMMAND 0xc00 -#define MV64340_PCI_1_COMMAND 0xc80 -#define MV64340_PCI_0_MODE 0xd00 -#define MV64340_PCI_1_MODE 0xd80 -#define MV64340_PCI_0_RETRY 0xc04 -#define MV64340_PCI_1_RETRY 0xc84 -#define MV64340_PCI_0_READ_BUFFER_DISCARD_TIMER 0xd04 -#define MV64340_PCI_1_READ_BUFFER_DISCARD_TIMER 0xd84 -#define MV64340_PCI_0_MSI_TRIGGER_TIMER 0xc38 -#define MV64340_PCI_1_MSI_TRIGGER_TIMER 0xcb8 -#define MV64340_PCI_0_ARBITER_CONTROL 0x1d00 -#define MV64340_PCI_1_ARBITER_CONTROL 0x1d80 -#define MV64340_PCI_0_CROSS_BAR_CONTROL_LOW 0x1d08 -#define MV64340_PCI_1_CROSS_BAR_CONTROL_LOW 0x1d88 -#define MV64340_PCI_0_CROSS_BAR_CONTROL_HIGH 0x1d0c -#define MV64340_PCI_1_CROSS_BAR_CONTROL_HIGH 0x1d8c -#define MV64340_PCI_0_CROSS_BAR_TIMEOUT 0x1d04 -#define MV64340_PCI_1_CROSS_BAR_TIMEOUT 0x1d84 -#define MV64340_PCI_0_SYNC_BARRIER_TRIGGER_REG 0x1D18 -#define MV64340_PCI_1_SYNC_BARRIER_TRIGGER_REG 0x1D98 -#define MV64340_PCI_0_SYNC_BARRIER_VIRTUAL_REG 0x1d10 -#define MV64340_PCI_1_SYNC_BARRIER_VIRTUAL_REG 0x1d90 -#define MV64340_PCI_0_P2P_CONFIG 0x1d14 -#define MV64340_PCI_1_P2P_CONFIG 0x1d94 - -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_LOW 0x1e00 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_0_HIGH 0x1e04 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_0 0x1e08 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_LOW 0x1e10 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_1_HIGH 0x1e14 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_1 0x1e18 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_LOW 0x1e20 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_2_HIGH 0x1e24 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_2 0x1e28 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_LOW 0x1e30 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_3_HIGH 0x1e34 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_3 0x1e38 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_LOW 0x1e40 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_4_HIGH 0x1e44 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_4 0x1e48 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_LOW 0x1e50 -#define MV64340_PCI_0_ACCESS_CONTROL_BASE_5_HIGH 0x1e54 -#define MV64340_PCI_0_ACCESS_CONTROL_SIZE_5 0x1e58 - -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_LOW 0x1e80 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_0_HIGH 0x1e84 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_0 0x1e88 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_LOW 0x1e90 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_1_HIGH 0x1e94 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_1 0x1e98 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_LOW 0x1ea0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_2_HIGH 0x1ea4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_2 0x1ea8 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_LOW 0x1eb0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_3_HIGH 0x1eb4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_3 0x1eb8 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_LOW 0x1ec0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_4_HIGH 0x1ec4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_4 0x1ec8 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_LOW 0x1ed0 -#define MV64340_PCI_1_ACCESS_CONTROL_BASE_5_HIGH 0x1ed4 -#define MV64340_PCI_1_ACCESS_CONTROL_SIZE_5 0x1ed8 - -/****************************************/ -/* PCI Configuration Access Registers */ -/****************************************/ - -#define MV64340_PCI_0_CONFIG_ADDR 0xcf8 -#define MV64340_PCI_0_CONFIG_DATA_VIRTUAL_REG 0xcfc -#define MV64340_PCI_1_CONFIG_ADDR 0xc78 -#define MV64340_PCI_1_CONFIG_DATA_VIRTUAL_REG 0xc7c -#define MV64340_PCI_0_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG 0xc34 -#define MV64340_PCI_1_INTERRUPT_ACKNOWLEDGE_VIRTUAL_REG 0xcb4 - -/****************************************/ -/* PCI Error Report Registers */ -/****************************************/ - -#define MV64340_PCI_0_SERR_MASK 0xc28 -#define MV64340_PCI_1_SERR_MASK 0xca8 -#define MV64340_PCI_0_ERROR_ADDR_LOW 0x1d40 -#define MV64340_PCI_1_ERROR_ADDR_LOW 0x1dc0 -#define MV64340_PCI_0_ERROR_ADDR_HIGH 0x1d44 -#define MV64340_PCI_1_ERROR_ADDR_HIGH 0x1dc4 -#define MV64340_PCI_0_ERROR_ATTRIBUTE 0x1d48 -#define MV64340_PCI_1_ERROR_ATTRIBUTE 0x1dc8 -#define MV64340_PCI_0_ERROR_COMMAND 0x1d50 -#define MV64340_PCI_1_ERROR_COMMAND 0x1dd0 -#define MV64340_PCI_0_ERROR_CAUSE 0x1d58 -#define MV64340_PCI_1_ERROR_CAUSE 0x1dd8 -#define MV64340_PCI_0_ERROR_MASK 0x1d5c -#define MV64340_PCI_1_ERROR_MASK 0x1ddc - -/****************************************/ -/* PCI Debug Registers */ -/****************************************/ - -#define MV64340_PCI_0_MMASK 0X1D24 -#define MV64340_PCI_1_MMASK 0X1DA4 - -/*********************************************/ -/* PCI Configuration, Function 0, Registers */ -/*********************************************/ - -#define MV64340_PCI_DEVICE_AND_VENDOR_ID 0x000 -#define MV64340_PCI_STATUS_AND_COMMAND 0x004 -#define MV64340_PCI_CLASS_CODE_AND_REVISION_ID 0x008 -#define MV64340_PCI_BIST_HEADER_TYPE_LATENCY_TIMER_CACHE_LINE 0x00C - -#define MV64340_PCI_SCS_0_BASE_ADDR_LOW 0x010 -#define MV64340_PCI_SCS_0_BASE_ADDR_HIGH 0x014 -#define MV64340_PCI_SCS_1_BASE_ADDR_LOW 0x018 -#define MV64340_PCI_SCS_1_BASE_ADDR_HIGH 0x01C -#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_LOW 0x020 -#define MV64340_PCI_INTERNAL_REG_MEM_MAPPED_BASE_ADDR_HIGH 0x024 -#define MV64340_PCI_SUBSYSTEM_ID_AND_SUBSYSTEM_VENDOR_ID 0x02c -#define MV64340_PCI_EXPANSION_ROM_BASE_ADDR_REG 0x030 -#define MV64340_PCI_CAPABILTY_LIST_POINTER 0x034 -#define MV64340_PCI_INTERRUPT_PIN_AND_LINE 0x03C - /* capability list */ -#define MV64340_PCI_POWER_MANAGEMENT_CAPABILITY 0x040 -#define MV64340_PCI_POWER_MANAGEMENT_STATUS_AND_CONTROL 0x044 -#define MV64340_PCI_VPD_ADDR 0x048 -#define MV64340_PCI_VPD_DATA 0x04c -#define MV64340_PCI_MSI_MESSAGE_CONTROL 0x050 -#define MV64340_PCI_MSI_MESSAGE_ADDR 0x054 -#define MV64340_PCI_MSI_MESSAGE_UPPER_ADDR 0x058 -#define MV64340_PCI_MSI_MESSAGE_DATA 0x05c -#define MV64340_PCI_X_COMMAND 0x060 -#define MV64340_PCI_X_STATUS 0x064 -#define MV64340_PCI_COMPACT_PCI_HOT_SWAP 0x068 - -/***********************************************/ -/* PCI Configuration, Function 1, Registers */ -/***********************************************/ - -#define MV64340_PCI_SCS_2_BASE_ADDR_LOW 0x110 -#define MV64340_PCI_SCS_2_BASE_ADDR_HIGH 0x114 -#define MV64340_PCI_SCS_3_BASE_ADDR_LOW 0x118 -#define MV64340_PCI_SCS_3_BASE_ADDR_HIGH 0x11c -#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_LOW 0x120 -#define MV64340_PCI_INTERNAL_SRAM_BASE_ADDR_HIGH 0x124 - -/***********************************************/ -/* PCI Configuration, Function 2, Registers */ -/***********************************************/ - -#define MV64340_PCI_DEVCS_0_BASE_ADDR_LOW 0x210 -#define MV64340_PCI_DEVCS_0_BASE_ADDR_HIGH 0x214 -#define MV64340_PCI_DEVCS_1_BASE_ADDR_LOW 0x218 -#define MV64340_PCI_DEVCS_1_BASE_ADDR_HIGH 0x21c -#define MV64340_PCI_DEVCS_2_BASE_ADDR_LOW 0x220 -#define MV64340_PCI_DEVCS_2_BASE_ADDR_HIGH 0x224 - -/***********************************************/ -/* PCI Configuration, Function 3, Registers */ -/***********************************************/ - -#define MV64340_PCI_DEVCS_3_BASE_ADDR_LOW 0x310 -#define MV64340_PCI_DEVCS_3_BASE_ADDR_HIGH 0x314 -#define MV64340_PCI_BOOT_CS_BASE_ADDR_LOW 0x318 -#define MV64340_PCI_BOOT_CS_BASE_ADDR_HIGH 0x31c -#define MV64340_PCI_CPU_BASE_ADDR_LOW 0x220 -#define MV64340_PCI_CPU_BASE_ADDR_HIGH 0x224 - -/***********************************************/ -/* PCI Configuration, Function 4, Registers */ -/***********************************************/ - -#define MV64340_PCI_P2P_MEM0_BASE_ADDR_LOW 0x410 -#define MV64340_PCI_P2P_MEM0_BASE_ADDR_HIGH 0x414 -#define MV64340_PCI_P2P_MEM1_BASE_ADDR_LOW 0x418 -#define MV64340_PCI_P2P_MEM1_BASE_ADDR_HIGH 0x41c -#define MV64340_PCI_P2P_I_O_BASE_ADDR 0x420 -#define MV64340_PCI_INTERNAL_REGS_I_O_MAPPED_BASE_ADDR 0x424 - -/****************************************/ -/* Messaging Unit Registers (I20) */ -/****************************************/ - -#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_0_SIDE 0x010 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_0_SIDE 0x014 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_0_SIDE 0x018 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_0_SIDE 0x01C -#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_0_SIDE 0x020 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE 0x024 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE 0x028 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_0_SIDE 0x02C -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_0_SIDE 0x030 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_0_SIDE 0x034 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE 0x040 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_0_SIDE 0x044 -#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_0_SIDE 0x050 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_0_SIDE 0x054 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE 0x060 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE 0x064 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE 0x068 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE 0x06C -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_0_SIDE 0x070 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_0_SIDE 0x074 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_0_SIDE 0x0F8 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_0_SIDE 0x0FC - -#define MV64340_I2O_INBOUND_MESSAGE_REG0_PCI_1_SIDE 0x090 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_PCI_1_SIDE 0x094 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_PCI_1_SIDE 0x098 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_PCI_1_SIDE 0x09C -#define MV64340_I2O_INBOUND_DOORBELL_REG_PCI_1_SIDE 0x0A0 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE 0x0A4 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE 0x0A8 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_PCI_1_SIDE 0x0AC -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_PCI_1_SIDE 0x0B0 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_PCI_1_SIDE 0x0B4 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE 0x0C0 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_PCI_1_SIDE 0x0C4 -#define MV64340_I2O_QUEUE_CONTROL_REG_PCI_1_SIDE 0x0D0 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_PCI_1_SIDE 0x0D4 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE 0x0E0 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE 0x0E4 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE 0x0E8 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE 0x0EC -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_PCI_1_SIDE 0x0F0 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_PCI_1_SIDE 0x0F4 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_PCI_1_SIDE 0x078 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_PCI_1_SIDE 0x07C - -#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU0_SIDE 0x1C10 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU0_SIDE 0x1C14 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU0_SIDE 0x1C18 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU0_SIDE 0x1C1C -#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU0_SIDE 0x1C20 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE 0x1C24 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU0_SIDE 0x1C28 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU0_SIDE 0x1C2C -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU0_SIDE 0x1C30 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU0_SIDE 0x1C34 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE 0x1C40 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU0_SIDE 0x1C44 -#define MV64340_I2O_QUEUE_CONTROL_REG_CPU0_SIDE 0x1C50 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU0_SIDE 0x1C54 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE 0x1C60 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE 0x1C64 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE 0x1C68 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE 0x1C6C -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU0_SIDE 0x1C70 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU0_SIDE 0x1C74 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU0_SIDE 0x1CF8 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU0_SIDE 0x1CFC -#define MV64340_I2O_INBOUND_MESSAGE_REG0_CPU1_SIDE 0x1C90 -#define MV64340_I2O_INBOUND_MESSAGE_REG1_CPU1_SIDE 0x1C94 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG0_CPU1_SIDE 0x1C98 -#define MV64340_I2O_OUTBOUND_MESSAGE_REG1_CPU1_SIDE 0x1C9C -#define MV64340_I2O_INBOUND_DOORBELL_REG_CPU1_SIDE 0x1CA0 -#define MV64340_I2O_INBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE 0x1CA4 -#define MV64340_I2O_INBOUND_INTERRUPT_MASK_REG_CPU1_SIDE 0x1CA8 -#define MV64340_I2O_OUTBOUND_DOORBELL_REG_CPU1_SIDE 0x1CAC -#define MV64340_I2O_OUTBOUND_INTERRUPT_CAUSE_REG_CPU1_SIDE 0x1CB0 -#define MV64340_I2O_OUTBOUND_INTERRUPT_MASK_REG_CPU1_SIDE 0x1CB4 -#define MV64340_I2O_INBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE 0x1CC0 -#define MV64340_I2O_OUTBOUND_QUEUE_PORT_VIRTUAL_REG_CPU1_SIDE 0x1CC4 -#define MV64340_I2O_QUEUE_CONTROL_REG_CPU1_SIDE 0x1CD0 -#define MV64340_I2O_QUEUE_BASE_ADDR_REG_CPU1_SIDE 0x1CD4 -#define MV64340_I2O_INBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE 0x1CE0 -#define MV64340_I2O_INBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE 0x1CE4 -#define MV64340_I2O_INBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE 0x1CE8 -#define MV64340_I2O_INBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE 0x1CEC -#define MV64340_I2O_OUTBOUND_FREE_HEAD_POINTER_REG_CPU1_SIDE 0x1CF0 -#define MV64340_I2O_OUTBOUND_FREE_TAIL_POINTER_REG_CPU1_SIDE 0x1CF4 -#define MV64340_I2O_OUTBOUND_POST_HEAD_POINTER_REG_CPU1_SIDE 0x1C78 -#define MV64340_I2O_OUTBOUND_POST_TAIL_POINTER_REG_CPU1_SIDE 0x1C7C - -/****************************************/ -/* Ethernet Unit Registers */ -/****************************************/ - -/*******************************************/ -/* CUNIT Registers */ -/*******************************************/ - - /* Address Decoding Register Map */ - -#define MV64340_CUNIT_BASE_ADDR_REG0 0xf200 -#define MV64340_CUNIT_BASE_ADDR_REG1 0xf208 -#define MV64340_CUNIT_BASE_ADDR_REG2 0xf210 -#define MV64340_CUNIT_BASE_ADDR_REG3 0xf218 -#define MV64340_CUNIT_SIZE0 0xf204 -#define MV64340_CUNIT_SIZE1 0xf20c -#define MV64340_CUNIT_SIZE2 0xf214 -#define MV64340_CUNIT_SIZE3 0xf21c -#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG0 0xf240 -#define MV64340_CUNIT_HIGH_ADDR_REMAP_REG1 0xf244 -#define MV64340_CUNIT_BASE_ADDR_ENABLE_REG 0xf250 -#define MV64340_MPSC0_ACCESS_PROTECTION_REG 0xf254 -#define MV64340_MPSC1_ACCESS_PROTECTION_REG 0xf258 -#define MV64340_CUNIT_INTERNAL_SPACE_BASE_ADDR_REG 0xf25C - - /* Error Report Registers */ - -#define MV64340_CUNIT_INTERRUPT_CAUSE_REG 0xf310 -#define MV64340_CUNIT_INTERRUPT_MASK_REG 0xf314 -#define MV64340_CUNIT_ERROR_ADDR 0xf318 - - /* Cunit Control Registers */ - -#define MV64340_CUNIT_ARBITER_CONTROL_REG 0xf300 -#define MV64340_CUNIT_CONFIG_REG 0xb40c -#define MV64340_CUNIT_CRROSBAR_TIMEOUT_REG 0xf304 - - /* Cunit Debug Registers */ - -#define MV64340_CUNIT_DEBUG_LOW 0xf340 -#define MV64340_CUNIT_DEBUG_HIGH 0xf344 -#define MV64340_CUNIT_MMASK 0xf380 - - /* MPSCs Clocks Routing Registers */ - -#define MV64340_MPSC_ROUTING_REG 0xb400 -#define MV64340_MPSC_RX_CLOCK_ROUTING_REG 0xb404 -#define MV64340_MPSC_TX_CLOCK_ROUTING_REG 0xb408 - - /* MPSCs Interrupts Registers */ - -#define MV64340_MPSC_CAUSE_REG(port) (0xb804 + (port<<3)) -#define MV64340_MPSC_MASK_REG(port) (0xb884 + (port<<3)) - -#define MV64340_MPSC_MAIN_CONFIG_LOW(port) (0x8000 + (port<<12)) -#define MV64340_MPSC_MAIN_CONFIG_HIGH(port) (0x8004 + (port<<12)) -#define MV64340_MPSC_PROTOCOL_CONFIG(port) (0x8008 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG1(port) (0x800c + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG2(port) (0x8010 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG3(port) (0x8014 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG4(port) (0x8018 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG5(port) (0x801c + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG6(port) (0x8020 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG7(port) (0x8024 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG8(port) (0x8028 + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG9(port) (0x802c + (port<<12)) -#define MV64340_MPSC_CHANNEL_REG10(port) (0x8030 + (port<<12)) - - /* MPSC0 Registers */ - - -/***************************************/ -/* SDMA Registers */ -/***************************************/ - -#define MV64340_SDMA_CONFIG_REG(channel) (0x4000 + (channel<<13)) -#define MV64340_SDMA_COMMAND_REG(channel) (0x4008 + (channel<<13)) -#define MV64340_SDMA_CURRENT_RX_DESCRIPTOR_POINTER(channel) (0x4810 + (channel<<13)) -#define MV64340_SDMA_CURRENT_TX_DESCRIPTOR_POINTER(channel) (0x4c10 + (channel<<13)) -#define MV64340_SDMA_FIRST_TX_DESCRIPTOR_POINTER(channel) (0x4c14 + (channel<<13)) - -#define MV64340_SDMA_CAUSE_REG 0xb800 -#define MV64340_SDMA_MASK_REG 0xb880 - -/* BRG Interrupts */ - -#define MV64340_BRG_CONFIG_REG(brg) (0xb200 + (brg<<3)) -#define MV64340_BRG_BAUDE_TUNING_REG(brg) (0xb208 + (brg<<3)) -#define MV64340_BRG_CAUSE_REG 0xb834 -#define MV64340_BRG_MASK_REG 0xb8b4 - -/****************************************/ -/* DMA Channel Control */ -/****************************************/ - -#define MV64340_DMA_CHANNEL0_CONTROL 0x840 -#define MV64340_DMA_CHANNEL0_CONTROL_HIGH 0x880 -#define MV64340_DMA_CHANNEL1_CONTROL 0x844 -#define MV64340_DMA_CHANNEL1_CONTROL_HIGH 0x884 -#define MV64340_DMA_CHANNEL2_CONTROL 0x848 -#define MV64340_DMA_CHANNEL2_CONTROL_HIGH 0x888 -#define MV64340_DMA_CHANNEL3_CONTROL 0x84C -#define MV64340_DMA_CHANNEL3_CONTROL_HIGH 0x88C - - -/****************************************/ -/* IDMA Registers */ -/****************************************/ - -#define MV64340_DMA_CHANNEL0_BYTE_COUNT 0x800 -#define MV64340_DMA_CHANNEL1_BYTE_COUNT 0x804 -#define MV64340_DMA_CHANNEL2_BYTE_COUNT 0x808 -#define MV64340_DMA_CHANNEL3_BYTE_COUNT 0x80C -#define MV64340_DMA_CHANNEL0_SOURCE_ADDR 0x810 -#define MV64340_DMA_CHANNEL1_SOURCE_ADDR 0x814 -#define MV64340_DMA_CHANNEL2_SOURCE_ADDR 0x818 -#define MV64340_DMA_CHANNEL3_SOURCE_ADDR 0x81c -#define MV64340_DMA_CHANNEL0_DESTINATION_ADDR 0x820 -#define MV64340_DMA_CHANNEL1_DESTINATION_ADDR 0x824 -#define MV64340_DMA_CHANNEL2_DESTINATION_ADDR 0x828 -#define MV64340_DMA_CHANNEL3_DESTINATION_ADDR 0x82C -#define MV64340_DMA_CHANNEL0_NEXT_DESCRIPTOR_POINTER 0x830 -#define MV64340_DMA_CHANNEL1_NEXT_DESCRIPTOR_POINTER 0x834 -#define MV64340_DMA_CHANNEL2_NEXT_DESCRIPTOR_POINTER 0x838 -#define MV64340_DMA_CHANNEL3_NEXT_DESCRIPTOR_POINTER 0x83C -#define MV64340_DMA_CHANNEL0_CURRENT_DESCRIPTOR_POINTER 0x870 -#define MV64340_DMA_CHANNEL1_CURRENT_DESCRIPTOR_POINTER 0x874 -#define MV64340_DMA_CHANNEL2_CURRENT_DESCRIPTOR_POINTER 0x878 -#define MV64340_DMA_CHANNEL3_CURRENT_DESCRIPTOR_POINTER 0x87C - - /* IDMA Address Decoding Base Address Registers */ - -#define MV64340_DMA_BASE_ADDR_REG0 0xa00 -#define MV64340_DMA_BASE_ADDR_REG1 0xa08 -#define MV64340_DMA_BASE_ADDR_REG2 0xa10 -#define MV64340_DMA_BASE_ADDR_REG3 0xa18 -#define MV64340_DMA_BASE_ADDR_REG4 0xa20 -#define MV64340_DMA_BASE_ADDR_REG5 0xa28 -#define MV64340_DMA_BASE_ADDR_REG6 0xa30 -#define MV64340_DMA_BASE_ADDR_REG7 0xa38 - - /* IDMA Address Decoding Size Address Register */ - -#define MV64340_DMA_SIZE_REG0 0xa04 -#define MV64340_DMA_SIZE_REG1 0xa0c -#define MV64340_DMA_SIZE_REG2 0xa14 -#define MV64340_DMA_SIZE_REG3 0xa1c -#define MV64340_DMA_SIZE_REG4 0xa24 -#define MV64340_DMA_SIZE_REG5 0xa2c -#define MV64340_DMA_SIZE_REG6 0xa34 -#define MV64340_DMA_SIZE_REG7 0xa3C - - /* IDMA Address Decoding High Address Remap and Access - Protection Registers */ - -#define MV64340_DMA_HIGH_ADDR_REMAP_REG0 0xa60 -#define MV64340_DMA_HIGH_ADDR_REMAP_REG1 0xa64 -#define MV64340_DMA_HIGH_ADDR_REMAP_REG2 0xa68 -#define MV64340_DMA_HIGH_ADDR_REMAP_REG3 0xa6C -#define MV64340_DMA_BASE_ADDR_ENABLE_REG 0xa80 -#define MV64340_DMA_CHANNEL0_ACCESS_PROTECTION_REG 0xa70 -#define MV64340_DMA_CHANNEL1_ACCESS_PROTECTION_REG 0xa74 -#define MV64340_DMA_CHANNEL2_ACCESS_PROTECTION_REG 0xa78 -#define MV64340_DMA_CHANNEL3_ACCESS_PROTECTION_REG 0xa7c -#define MV64340_DMA_ARBITER_CONTROL 0x860 -#define MV64340_DMA_CROSS_BAR_TIMEOUT 0x8d0 - - /* IDMA Headers Retarget Registers */ - -#define MV64340_DMA_HEADERS_RETARGET_CONTROL 0xa84 -#define MV64340_DMA_HEADERS_RETARGET_BASE 0xa88 - - /* IDMA Interrupt Register */ - -#define MV64340_DMA_INTERRUPT_CAUSE_REG 0x8c0 -#define MV64340_DMA_INTERRUPT_CAUSE_MASK 0x8c4 -#define MV64340_DMA_ERROR_ADDR 0x8c8 -#define MV64340_DMA_ERROR_SELECT 0x8cc - - /* IDMA Debug Register ( for internal use ) */ - -#define MV64340_DMA_DEBUG_LOW 0x8e0 -#define MV64340_DMA_DEBUG_HIGH 0x8e4 -#define MV64340_DMA_SPARE 0xA8C - -/****************************************/ -/* Timer_Counter */ -/****************************************/ - -#define MV64340_TIMER_COUNTER0 0x850 -#define MV64340_TIMER_COUNTER1 0x854 -#define MV64340_TIMER_COUNTER2 0x858 -#define MV64340_TIMER_COUNTER3 0x85C -#define MV64340_TIMER_COUNTER_0_3_CONTROL 0x864 -#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_CAUSE 0x868 -#define MV64340_TIMER_COUNTER_0_3_INTERRUPT_MASK 0x86c - -/****************************************/ -/* Watchdog registers */ -/****************************************/ - -#define MV64340_WATCHDOG_CONFIG_REG 0xb410 -#define MV64340_WATCHDOG_VALUE_REG 0xb414 - -/****************************************/ -/* I2C Registers */ -/****************************************/ - -#define MV64XXX_I2C_OFFSET 0xc000 -#define MV64XXX_I2C_REG_BLOCK_SIZE 0x0020 - -/****************************************/ -/* GPP Interface Registers */ -/****************************************/ - -#define MV64340_GPP_IO_CONTROL 0xf100 -#define MV64340_GPP_LEVEL_CONTROL 0xf110 -#define MV64340_GPP_VALUE 0xf104 -#define MV64340_GPP_INTERRUPT_CAUSE 0xf108 -#define MV64340_GPP_INTERRUPT_MASK0 0xf10c -#define MV64340_GPP_INTERRUPT_MASK1 0xf114 -#define MV64340_GPP_VALUE_SET 0xf118 -#define MV64340_GPP_VALUE_CLEAR 0xf11c - -/****************************************/ -/* Interrupt Controller Registers */ -/****************************************/ - -/****************************************/ -/* Interrupts */ -/****************************************/ - -#define MV64340_MAIN_INTERRUPT_CAUSE_LOW 0x004 -#define MV64340_MAIN_INTERRUPT_CAUSE_HIGH 0x00c -#define MV64340_CPU_INTERRUPT0_MASK_LOW 0x014 -#define MV64340_CPU_INTERRUPT0_MASK_HIGH 0x01c -#define MV64340_CPU_INTERRUPT0_SELECT_CAUSE 0x024 -#define MV64340_CPU_INTERRUPT1_MASK_LOW 0x034 -#define MV64340_CPU_INTERRUPT1_MASK_HIGH 0x03c -#define MV64340_CPU_INTERRUPT1_SELECT_CAUSE 0x044 -#define MV64340_INTERRUPT0_MASK_0_LOW 0x054 -#define MV64340_INTERRUPT0_MASK_0_HIGH 0x05c -#define MV64340_INTERRUPT0_SELECT_CAUSE 0x064 -#define MV64340_INTERRUPT1_MASK_0_LOW 0x074 -#define MV64340_INTERRUPT1_MASK_0_HIGH 0x07c -#define MV64340_INTERRUPT1_SELECT_CAUSE 0x084 - -/****************************************/ -/* MPP Interface Registers */ -/****************************************/ - -#define MV64340_MPP_CONTROL0 0xf000 -#define MV64340_MPP_CONTROL1 0xf004 -#define MV64340_MPP_CONTROL2 0xf008 -#define MV64340_MPP_CONTROL3 0xf00c - -/****************************************/ -/* Serial Initialization registers */ -/****************************************/ - -#define MV64340_SERIAL_INIT_LAST_DATA 0xf324 -#define MV64340_SERIAL_INIT_CONTROL 0xf328 -#define MV64340_SERIAL_INIT_STATUS 0xf32c - -extern void mv64340_irq_init(unsigned int base); - -#endif /* __ASM_MV643XX_H */ diff --git a/include/linux/net.h b/include/linux/net.h index 688320b79fcc..b75bc534c1b3 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -322,6 +322,25 @@ static inline bool sendpage_ok(struct page *page) return !PageSlab(page) && page_count(page) >= 1; } +/* + * Check sendpage_ok on contiguous pages. + */ +static inline bool sendpages_ok(struct page *page, size_t len, size_t offset) +{ + struct page *p = page + (offset >> PAGE_SHIFT); + size_t count = 0; + + while (count < len) { + if (!sendpage_ok(p)) + return false; + + p++; + count += PAGE_SIZE; + } + + return true; +} + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 7c2d77d75a88..11be70a7929f 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -24,9 +24,8 @@ enum { NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */ NETIF_F_VLAN_CHALLENGED_BIT, /* Device cannot handle VLAN packets */ NETIF_F_GSO_BIT, /* Enable software GSO. */ - NETIF_F_LLTX_BIT, /* LockLess TX - deprecated. Please */ - /* do not use LLTX in new drivers */ - NETIF_F_NETNS_LOCAL_BIT, /* Does not change network namespaces */ + __UNUSED_NETIF_F_12, + __UNUSED_NETIF_F_13, NETIF_F_GRO_BIT, /* Generic receive offload */ NETIF_F_LRO_BIT, /* large receive offload */ @@ -59,7 +58,7 @@ enum { NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ - NETIF_F_FCOE_MTU_BIT, /* Supports max FCoE MTU, 2158 bytes*/ + __UNUSED_NETIF_F_37, NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ @@ -106,7 +105,6 @@ enum { #define __NETIF_F(name) __NETIF_F_BIT(NETIF_F_##name##_BIT) #define NETIF_F_FCOE_CRC __NETIF_F(FCOE_CRC) -#define NETIF_F_FCOE_MTU __NETIF_F(FCOE_MTU) #define NETIF_F_FRAGLIST __NETIF_F(FRAGLIST) #define NETIF_F_FSO __NETIF_F(FSO) #define NETIF_F_GRO __NETIF_F(GRO) @@ -120,10 +118,8 @@ enum { #define NETIF_F_HW_VLAN_CTAG_TX __NETIF_F(HW_VLAN_CTAG_TX) #define NETIF_F_IP_CSUM __NETIF_F(IP_CSUM) #define NETIF_F_IPV6_CSUM __NETIF_F(IPV6_CSUM) -#define NETIF_F_LLTX __NETIF_F(LLTX) #define NETIF_F_LOOPBACK __NETIF_F(LOOPBACK) #define NETIF_F_LRO __NETIF_F(LRO) -#define NETIF_F_NETNS_LOCAL __NETIF_F(NETNS_LOCAL) #define NETIF_F_NOCACHE_COPY __NETIF_F(NOCACHE_COPY) #define NETIF_F_NTUPLE __NETIF_F(NTUPLE) #define NETIF_F_RXCSUM __NETIF_F(RXCSUM) @@ -192,8 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ -#define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ - NETIF_F_LLTX | NETIF_F_NETNS_LOCAL) +#define NETIF_F_NEVER_CHANGE NETIF_F_VLAN_CHALLENGED /* remember that ((t)1 << t_BITS) is undefined in C99 */ #define NETIF_F_ETHTOOL_BITS ((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \ @@ -214,9 +209,6 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | \ NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) -#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ - NETIF_F_FSO) - /* List of features with software fallbacks. */ #define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) @@ -261,4 +253,11 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) +static inline netdev_features_t netdev_base_features(netdev_features_t features) +{ + features &= ~NETIF_F_ONE_FOR_ALL; + features |= NETIF_F_ALL_FOR_ALL; + return features; +} + #endif /* _LINUX_NETDEV_FEATURES_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 607009150b5f..ecc686409161 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,7 +40,6 @@ #include <net/dcbnl.h> #endif #include <net/netprio_cgroup.h> - #include <linux/netdev_features.h> #include <linux/neighbour.h> #include <linux/netdevice_xmit.h> @@ -53,6 +52,7 @@ #include <net/net_trackers.h> #include <net/net_debug.h> #include <net/dropreason-core.h> +#include <net/neighbour_tables.h> struct netpoll_info; struct device; @@ -81,6 +81,7 @@ struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; +struct phy_link_topology; typedef u32 xdp_features_t; @@ -343,6 +344,16 @@ struct gro_list { #define GRO_HASH_BUCKETS 8 /* + * Structure for per-NAPI config + */ +struct napi_config { + u64 gro_flush_timeout; + u64 irq_suspend_timeout; + u32 defer_hard_irqs; + unsigned int napi_id; +}; + +/* * Structure for NAPI scheduling similar to tasklet but with weighting */ struct napi_struct { @@ -356,7 +367,7 @@ struct napi_struct { unsigned long state; int weight; - int defer_hard_irqs_count; + u32 defer_hard_irqs_count; unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL @@ -373,10 +384,15 @@ struct napi_struct { unsigned int napi_id; struct hrtimer timer; struct task_struct *thread; + unsigned long gro_flush_timeout; + unsigned long irq_suspend_timeout; + u32 defer_hard_irqs; /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; int irq; + int index; + struct napi_config *config; }; enum { @@ -645,9 +661,6 @@ struct netdev_queue { #ifdef CONFIG_SYSFS struct kobject kobj; #endif -#if defined(CONFIG_XPS) && defined(CONFIG_NUMA) - int numa_node; -#endif unsigned long tx_maxrate; /* * Number of TX timeouts for this queue @@ -660,13 +673,13 @@ struct netdev_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif - /* NAPI instance for the queue - * Readers and writers must hold RTNL - */ - struct napi_struct *napi; + /* * write-mostly part */ +#ifdef CONFIG_BQL + struct dql dql; +#endif spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; /* @@ -676,8 +689,16 @@ struct netdev_queue { unsigned long state; -#ifdef CONFIG_BQL - struct dql dql; +/* + * slow- / control-path part + */ + /* NAPI instance for the queue + * Readers and writers must hold RTNL + */ + struct napi_struct *napi; + +#if defined(CONFIG_XPS) && defined(CONFIG_NUMA) + int numa_node; #endif } ____cacheline_aligned_in_smp; @@ -1227,12 +1248,17 @@ struct netdev_net_notifier { * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid, u16 flags, - * struct netlink_ext_ack *extack); + * bool *notified, struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. + * Callee shall set *notified to true if it sent any appropriate + * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 vid) - * Deletes the FDB entry from dev coresponding to addr. + * const unsigned char *addr, u16 vid + * bool *notified, struct netlink_ext_ack *extack); + * Deletes the FDB entry from dev corresponding to addr. + * Callee shall set *notified to true if it sent any appropriate + * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, @@ -1407,8 +1433,7 @@ struct net_device_ops { __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); - int (*ndo_netpoll_setup)(struct net_device *dev, - struct netpoll_info *info); + int (*ndo_netpoll_setup)(struct net_device *dev); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, @@ -1505,12 +1530,15 @@ struct net_device_ops { const unsigned char *addr, u16 vid, u16 flags, + bool *notified, struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, - u16 vid, struct netlink_ext_ack *extack); + u16 vid, + bool *notified, + struct netlink_ext_ack *extack); int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); @@ -1598,6 +1626,14 @@ struct net_device_ops { int (*ndo_hwtstamp_set)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config, struct netlink_ext_ack *extack); + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_ops: Device shaping offload operations + * see include/net/net_shapers.h + */ + const struct net_shaper_ops *net_shaper_ops; +#endif }; /** @@ -1608,7 +1644,8 @@ struct net_device_ops { * userspace; this means that the order of these flags can change * during any kernel release. * - * You should have a pretty good reason to be extending these flags. + * You should add bitfield booleans after either net_device::priv_flags + * (hotpath) or ::threaded (slowpath) instead of extending these flags. * * @IFF_802_1Q_VLAN: 802.1Q VLAN device * @IFF_EBRIDGE: Ethernet bridging device @@ -1647,10 +1684,6 @@ struct net_device_ops { * @IFF_NO_ADDRCONF: prevent ipv6 addrconf * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) - * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN - * @IFF_SEE_ALL_HWTSTAMP_REQUESTS: device wants to see calls to - * ndo_hwtstamp_set() for all timestamp requests regardless of source, - * even if those aren't HWTSTAMP_SOURCE_NETDEV. */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1685,42 +1718,8 @@ enum netdev_priv_flags { IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_NO_ADDRCONF = BIT_ULL(30), IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), - IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), - IFF_SEE_ALL_HWTSTAMP_REQUESTS = BIT_ULL(33), }; -#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN -#define IFF_EBRIDGE IFF_EBRIDGE -#define IFF_BONDING IFF_BONDING -#define IFF_ISATAP IFF_ISATAP -#define IFF_WAN_HDLC IFF_WAN_HDLC -#define IFF_XMIT_DST_RELEASE IFF_XMIT_DST_RELEASE -#define IFF_DONT_BRIDGE IFF_DONT_BRIDGE -#define IFF_DISABLE_NETPOLL IFF_DISABLE_NETPOLL -#define IFF_MACVLAN_PORT IFF_MACVLAN_PORT -#define IFF_BRIDGE_PORT IFF_BRIDGE_PORT -#define IFF_OVS_DATAPATH IFF_OVS_DATAPATH -#define IFF_TX_SKB_SHARING IFF_TX_SKB_SHARING -#define IFF_UNICAST_FLT IFF_UNICAST_FLT -#define IFF_TEAM_PORT IFF_TEAM_PORT -#define IFF_SUPP_NOFCS IFF_SUPP_NOFCS -#define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE -#define IFF_MACVLAN IFF_MACVLAN -#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM -#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER -#define IFF_NO_QUEUE IFF_NO_QUEUE -#define IFF_OPENVSWITCH IFF_OPENVSWITCH -#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE -#define IFF_TEAM IFF_TEAM -#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED -#define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM -#define IFF_MACSEC IFF_MACSEC -#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER -#define IFF_FAILOVER IFF_FAILOVER -#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE -#define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER -#define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR - /* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type { ML_PRIV_NONE, @@ -1750,6 +1749,12 @@ enum netdev_reg_state { * data with strictly "high-level" data, and it has to know about * almost every data structure used in the INET module. * + * @priv_flags: flags invisible to userspace defined as bits, see + * enum netdev_priv_flags for the definitions + * @lltx: device supports lockless Tx. Deprecated for real HW + * drivers. Mainly used by logical interfaces, such as + * bonding and tunnels + * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name * of the interface. @@ -1799,7 +1804,6 @@ enum netdev_reg_state { * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, * see <net/iw_handler.h> for details. - * @wireless_data: Instance data managed by the core of wireless extensions * * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions @@ -1816,8 +1820,6 @@ enum netdev_reg_state { * * @flags: Interface flags (a la BSD) * @xdp_features: XDP capability supported by the device - * @priv_flags: Like 'flags' but invisible to userspace, - * see if.h for the definitions * @gflags: Global flags ( kept as legacy ) * @priv_len: Size of the ->priv flexible array * @priv: Flexible array containing private data @@ -1886,9 +1888,6 @@ enum netdev_reg_state { * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device * @xdp_prog: XDP sockets filter program pointer - * @gro_flush_timeout: timeout for GRO layer in NAPI - * @napi_defer_hard_irqs: If not zero, provides a counter that would - * allow to avoid NIC hard IRQ, on busy queues. * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one @@ -1978,6 +1977,7 @@ enum netdev_reg_state { * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one + * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. @@ -1990,6 +1990,14 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @see_all_hwtstamp_requests: device wants to see calls to + * ndo_hwtstamp_set() for all timestamp requests + * regardless of source, even if those aren't + * HWTSTAMP_SOURCE_NETDEV + * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN + * @netns_local: interface can't change network namespaces + * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2028,6 +2036,16 @@ enum netdev_reg_state { * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, * where the clock is recovered. * + * @max_pacing_offload_horizon: max EDT offload horizon in nsec. + * @napi_config: An array of napi_config structures containing per-NAPI + * settings. + * @gro_flush_timeout: timeout for GRO layer in NAPI + * @napi_defer_hard_irqs: If not zero, provides a counter that would + * allow to avoid NIC hard IRQ, on busy queues. + * + * @neighbours: List heads pointing to this device's neighbours' + * dev_list, one per address-family. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2040,7 +2058,10 @@ struct net_device { /* TX read-mostly hotpath */ __cacheline_group_begin(net_device_read_tx); - unsigned long long priv_flags; + struct_group(priv_flags_fast, + unsigned long priv_flags:32; + unsigned long lltx:1; + ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; struct netdev_queue *_tx; @@ -2090,8 +2111,6 @@ struct net_device { int ifindex; unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; - unsigned long gro_flush_timeout; - int napi_defer_hard_irqs; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; @@ -2166,7 +2185,6 @@ struct net_device { #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def *wireless_handlers; - struct iw_public_data *wireless_data; #endif const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -2225,6 +2243,9 @@ struct net_device { /* Protocol-specific pointers */ struct in_device __rcu *ip_ptr; + /** @fib_nh_head: nexthops associated with this netdev */ + struct hlist_head fib_nh_head; + #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif @@ -2369,12 +2390,19 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif + struct phy_link_topology *link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; + /* priv_flags_slow, ungrouped to save space */ + unsigned long see_all_hwtstamp_requests:1; + unsigned long change_proto_down:1; + unsigned long netns_local:1; + unsigned long fcoe_mtu:1; + struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) @@ -2408,6 +2436,27 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; + u64 max_pacing_offload_horizon; + struct napi_config *napi_config; + unsigned long gro_flush_timeout; + u32 napi_defer_hard_irqs; + + /** + * @lock: protects @net_shaper_hierarchy, feel free to use for other + * netdev-scope protection. Ordering: take after rtnl_lock. + */ + struct mutex lock; + +#if IS_ENABLED(CONFIG_NET_SHAPER) + /** + * @net_shaper_hierarchy: data tracking the current shaper status + * see include/net/net_shapers.h + */ + struct net_shaper_hierarchy *net_shaper_hierarchy; +#endif + + struct hlist_head neighbours[NEIGH_NR_TABLES]; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; @@ -2658,6 +2707,22 @@ netif_napi_add_tx_weight(struct net_device *dev, } /** + * netif_napi_add_config - initialize a NAPI context with persistent config + * @dev: network device + * @napi: NAPI context + * @poll: polling function + * @index: the NAPI index + */ +static inline void +netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int index) +{ + napi->index = index; + napi->config = &dev->napi_config[index]; + netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); +} + +/** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device * @napi: NAPI context @@ -3094,8 +3159,6 @@ void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); @@ -3336,6 +3399,12 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev) static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { + /* Paired with READ_ONCE() from dev_watchdog() */ + WRITE_ONCE(dev_queue->trans_start, jiffies); + + /* This barrier is paired with smp_mb() from dev_watchdog() */ + smp_mb__before_atomic(); + /* Must be an atomic op see netif_txq_try_stop() */ set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -3462,6 +3531,12 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, if (likely(dql_avail(&dev_queue->dql) >= 0)) return; + /* Paired with READ_ONCE() from dev_watchdog() */ + WRITE_ONCE(dev_queue->trans_start, jiffies); + + /* This barrier is paired with smp_mb() from dev_watchdog() */ + smp_mb__before_atomic(); + set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); /* @@ -3469,7 +3544,7 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, * because in netdev_tx_completed_queue we update the dql_completed * before checking the XOFF flag. */ - smp_mb(); + smp_mb__after_atomic(); /* check again in case another CPU has just made room avail */ if (unlikely(dql_avail(&dev_queue->dql) >= 0)) @@ -3539,7 +3614,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, dql_completed(&dev_queue->dql, bytes); /* - * Without the memory barrier there is a small possiblity that + * Without the memory barrier there is a small possibility that * netdev_tx_sent_queue will miss the update and cause the queue to * be stopped forever */ @@ -3578,6 +3653,17 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) } /** + * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue + * @dev: network device + * @qid: stack index of the queue to reset + */ +static inline void netdev_tx_reset_subqueue(const struct net_device *dev, + u32 qid) +{ + netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid)); +} + +/** * netdev_reset_queue - reset the packets and bytes count of a network device * @dev_queue: network device * @@ -3586,7 +3672,7 @@ static inline void netdev_tx_reset_queue(struct netdev_queue *q) */ static inline void netdev_reset_queue(struct net_device *dev_queue) { - netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0)); + netdev_tx_reset_subqueue(dev_queue, 0); } /** @@ -3950,8 +4036,11 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); +int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); +u32 dev_get_min_mp_channel_count(const struct net_device *dev); + int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); @@ -4449,7 +4538,7 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } #define HARD_TX_LOCK(dev, txq, cpu) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ + if (!(dev)->lltx) { \ __netif_tx_lock(txq, cpu); \ } else { \ __netif_tx_acquire(txq); \ @@ -4457,12 +4546,12 @@ static inline void netif_tx_unlock_bh(struct net_device *dev) } #define HARD_TX_TRYLOCK(dev, txq) \ - (((dev->features & NETIF_F_LLTX) == 0) ? \ + (!(dev)->lltx ? \ __netif_tx_trylock(txq) : \ __netif_tx_acquire(txq)) #define HARD_TX_UNLOCK(dev, txq) { \ - if ((dev->features & NETIF_F_LLTX) == 0) { \ + if (!(dev)->lltx) { \ __netif_tx_unlock(txq); \ } else { \ __netif_tx_release(txq); \ @@ -4607,7 +4696,7 @@ void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev); /** - * __dev_uc_sync - Synchonize device's unicast list + * __dev_uc_sync - Synchronize device's unicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed @@ -4651,7 +4740,7 @@ void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev); /** - * __dev_mc_sync - Synchonize device's multicast list + * __dev_mc_sync - Synchronize device's multicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed @@ -5026,6 +5115,24 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs); void netif_inherit_tso_max(struct net_device *to, const struct net_device *from); +static inline unsigned int +netif_get_gro_max_size(const struct net_device *dev, const struct sk_buff *skb) +{ + /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */ + return skb->protocol == htons(ETH_P_IPV6) ? + READ_ONCE(dev->gro_max_size) : + READ_ONCE(dev->gro_ipv4_max_size); +} + +static inline unsigned int +netif_get_gso_max_size(const struct net_device *dev, const struct sk_buff *skb) +{ + /* pairs with WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ + return skb->protocol == htons(ETH_P_IPV6) ? + READ_ONCE(dev->gso_max_size) : + READ_ONCE(dev->gso_ipv4_max_size); +} + static inline bool netif_is_macsec(const struct net_device *dev) { return dev->priv_flags & IFF_MACSEC; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2683b2b77612..2b8aac2c70ad 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -376,15 +376,11 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, struct nf_conn; enum nf_nat_manip_type; struct nlattr; -enum ip_conntrack_dir; struct nf_nat_hook { int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl); - unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct, - enum nf_nat_manip_type mtype, - enum ip_conntrack_dir dir); void (*remove_nat_bysrc)(struct nf_conn *ct); }; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c47443e7a97e..5eaceef41e6c 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -38,11 +38,8 @@ static inline void folio_start_private_2(struct folio *folio) folio_set_private_2(folio); } -/* Marks used on xarray-based buffers */ -#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ -#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ - enum netfs_io_source { + NETFS_SOURCE_UNKNOWN, NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, NETFS_READ_FROM_CACHE, @@ -73,6 +70,7 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ +#define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */ }; /* @@ -133,9 +131,11 @@ static inline struct netfs_group *netfs_folio_group(struct folio *folio) struct netfs_io_stream { /* Submission tracking */ struct netfs_io_subrequest *construct; /* Op being constructed */ + size_t sreq_max_len; /* Maximum size of a subrequest */ + unsigned int sreq_max_segs; /* 0 or max number of segments in an iterator */ unsigned int submit_off; /* Folio offset we're submitting from */ unsigned int submit_len; /* Amount of data left to submit */ - unsigned int submit_max_len; /* Amount I/O can be rounded up to */ + unsigned int submit_extendable_to; /* Amount I/O can be rounded up to */ void (*prepare_write)(struct netfs_io_subrequest *subreq); void (*issue_write)(struct netfs_io_subrequest *subreq); /* Collection tracking */ @@ -176,41 +176,45 @@ struct netfs_io_subrequest { struct list_head rreq_link; /* Link in rreq->subrequests */ struct iov_iter io_iter; /* Iterator for this subrequest */ unsigned long long start; /* Where to start the I/O */ - size_t max_len; /* Maximum size of the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ + size_t consumed; /* Amount of read data consumed */ + size_t prev_donated; /* Amount of data donated from previous subreq */ + size_t next_donated; /* Amount of data donated from next subreq */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ unsigned int nr_segs; /* Number of segs in io_iter */ - unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* I/O stream this belongs to */ + unsigned char curr_folioq_slot; /* Folio currently being read */ + unsigned char curr_folio_order; /* Order of folio */ + struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */ #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ +#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */ #define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ #define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ #define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ #define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ -#define NETFS_SREQ_HIT_EOF 12 /* Set if we hit the EOF */ }; enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ + NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ - NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */ + NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ - NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_DIO_WRITE, /* This is a direct I/O write */ + NETFS_PGPRIV2_COPY_TO_CACHE, /* [DEPRECATED] This is writing read data to the cache */ nr__netfs_io_origin } __mode(byte); @@ -227,11 +231,14 @@ struct netfs_io_request { struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; + struct readahead_control *ractl; /* Readahead descriptor */ struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams struct netfs_group *group; /* Writeback group being written back */ + struct folio_queue *buffer; /* Head of I/O buffer */ + struct folio_queue *buffer_tail; /* Tail of I/O buffer */ struct iov_iter iter; /* Unencrypted-side iterator */ struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ void *netfs_priv; /* Private data for the netfs */ @@ -245,24 +252,23 @@ struct netfs_io_request { unsigned int nr_group_rel; /* Number of refs to release on ->group */ spinlock_t lock; /* Lock for queuing subreqs */ atomic_t nr_outstanding; /* Number of ops in progress */ - atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ - size_t upper_len; /* Length can be extended to here */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ - short error; /* 0 or error that occurred */ + long error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ + u8 buffer_head_slot; /* First slot in ->buffer */ + u8 buffer_tail_slot; /* Next slot in ->buffer_tail */ unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ - unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */ unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + size_t prev_donated; /* Fallback for subreq->prev_donated */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */ #define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ @@ -274,6 +280,7 @@ struct netfs_io_request { #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ +#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; @@ -292,7 +299,7 @@ struct netfs_request_ops { /* Read request handling */ void (*expand_readahead)(struct netfs_io_request *rreq); - bool (*clamp_length)(struct netfs_io_subrequest *subreq); + int (*prepare_read)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); bool (*is_still_valid)(struct netfs_io_request *rreq); int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, @@ -422,7 +429,10 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp); vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); /* (Sub)request management API. */ -void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); +void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, + bool was_async); +void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, + int error, bool was_async); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, diff --git a/include/linux/netlink.h b/include/linux/netlink.h index b332c2048c75..c3ae84a77e16 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -34,6 +34,7 @@ struct netlink_skb_parms { #define NETLINK_CB(skb) (*(struct netlink_skb_parms*)&((skb)->cb)) #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) +#define NETLINK_CTX_SIZE 48 void netlink_table_grab(void); @@ -239,7 +240,7 @@ int netlink_register_notifier(struct notifier_block *nb); int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ -struct sock *netlink_getsockbyfilp(struct file *filp); +struct sock *netlink_getsockbyfd(int fd); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); @@ -293,7 +294,7 @@ struct netlink_callback { int flags; bool strict_check; union { - u8 ctx[48]; + u8 ctx[NETLINK_CTX_SIZE]; /* args is deprecated. Cast a struct over ctx instead * for proper type safety. @@ -302,7 +303,7 @@ struct netlink_callback { }; }; -#define NL_ASSERT_DUMP_CTX_FITS(type_name) \ +#define NL_ASSERT_CTX_FITS(type_name) \ BUILD_BUG_ON(sizeof(type_name) > \ sizeof_field(struct netlink_callback, ctx)) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index bd19c4b91e31..b34301650c47 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -32,6 +32,7 @@ struct netpoll { bool ipv6; u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; + struct sk_buff_head skb_pool; }; struct netpoll_info { @@ -64,6 +65,7 @@ int netpoll_setup(struct netpoll *np); void __netpoll_cleanup(struct netpoll *np); void __netpoll_free(struct netpoll *np); void netpoll_cleanup(struct netpoll *np); +void do_netpoll_cleanup(struct netpoll *np); netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb); #ifdef CONFIG_NETPOLL @@ -71,7 +73,7 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi) { struct net_device *dev = napi->dev; - if (dev && dev->npinfo) { + if (dev && rcu_access_pointer(dev->npinfo)) { int owner = smp_processor_id(); while (cmpxchg(&napi->poll_owner, -1, owner) != -1) diff --git a/include/linux/nfs.h b/include/linux/nfs.h index ceb70a926b95..9ad727ddfedb 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -8,11 +8,20 @@ #ifndef _LINUX_NFS_H #define _LINUX_NFS_H +#include <linux/cred.h> +#include <linux/sunrpc/auth.h> #include <linux/sunrpc/msg_prot.h> #include <linux/string.h> #include <linux/crc32.h> #include <uapi/linux/nfs.h> +/* The LOCALIO program is entirely private to Linux and is + * NOT part of the uapi. + */ +#define NFS_LOCALIO_PROGRAM 400122 +#define LOCALIOPROC_NULL 0 +#define LOCALIOPROC_UUID_IS_LOCAL 1 + /* * This is the kernel NFS client file handle representation */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index f9df88091c6d..8d7430d9f218 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -281,15 +281,18 @@ enum nfsstat4 { /* nfs42 */ NFS4ERR_PARTNER_NOTSUPP = 10088, NFS4ERR_PARTNER_NO_AUTH = 10089, - NFS4ERR_UNION_NOTSUPP = 10090, - NFS4ERR_OFFLOAD_DENIED = 10091, - NFS4ERR_WRONG_LFS = 10092, - NFS4ERR_BADLABEL = 10093, - NFS4ERR_OFFLOAD_NO_REQS = 10094, + NFS4ERR_UNION_NOTSUPP = 10090, + NFS4ERR_OFFLOAD_DENIED = 10091, + NFS4ERR_WRONG_LFS = 10092, + NFS4ERR_BADLABEL = 10093, + NFS4ERR_OFFLOAD_NO_REQS = 10094, /* xattr (RFC8276) */ - NFS4ERR_NOXATTR = 10095, - NFS4ERR_XATTR2BIG = 10096, + NFS4ERR_NOXATTR = 10095, + NFS4ERR_XATTR2BIG = 10096, + + /* can be used for internal errors */ + NFS4ERR_FIRST_FREE }; /* error codes for internal client use */ diff --git a/include/linux/nfs_common.h b/include/linux/nfs_common.h new file mode 100644 index 000000000000..5fc02df88252 --- /dev/null +++ b/include/linux/nfs_common.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file contains constants and methods used by both NFS client and server. + */ +#ifndef _LINUX_NFS_COMMON_H +#define _LINUX_NFS_COMMON_H + +#include <linux/errno.h> +#include <uapi/linux/nfs.h> + +/* Mapping from NFS error code to "errno" error code. */ +#define errno_NFSERR_IO EIO + +int nfs_stat_to_errno(enum nfs_stat status); +int nfs4_stat_to_errno(int stat); + +#endif /* _LINUX_NFS_COMMON_H */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1df86ab98c77..b804346a9741 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -8,6 +8,7 @@ #include <linux/wait.h> #include <linux/nfs_xdr.h> #include <linux/sunrpc/xprt.h> +#include <linux/nfslocalio.h> #include <linux/atomic.h> #include <linux/refcount.h> @@ -49,6 +50,7 @@ struct nfs_client { #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ #define NFS_CS_PNFS 9 /* - Server used for pnfs */ +#define NFS_CS_LOCAL_IO 10 /* - client is local */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -125,6 +127,13 @@ struct nfs_client { struct net *cl_net; struct list_head pending_cb_stateids; struct rcu_head rcu; + +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + struct timespec64 cl_nfssvc_boot; + seqlock_t cl_boot_lock; + nfs_uuid_t cl_uuid; + spinlock_t cl_localio_lock; +#endif /* CONFIG_NFS_LOCALIO */ }; /* @@ -158,6 +167,7 @@ struct nfs_server { #define NFS_MOUNT_WRITE_WAIT 0x02000000 #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 #define NFS_MOUNT_SHUTDOWN 0x08000000 +#define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ @@ -234,12 +244,12 @@ struct nfs_server { /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; #endif - struct ida openowner_id; - struct ida lockowner_id; + atomic64_t owner_ctr; struct list_head state_owners_lru; struct list_head layouts; struct list_head delegations; struct list_head ss_copies; + struct list_head ss_src_copies; unsigned long delegation_gen; unsigned long mig_gen; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 45623af3e7b8..559273a0f16d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -446,7 +446,7 @@ struct nfs42_clone_res { struct stateowner_id { __u64 create_time; - __u32 uniquifier; + __u64 uniquifier; }; struct nfs4_open_delegation { @@ -1336,7 +1336,7 @@ struct pnfs_commit_array { struct rcu_head rcu; refcount_t refcount; unsigned int nbuckets; - struct pnfs_commit_bucket buckets[]; + struct pnfs_commit_bucket buckets[] __counted_by(nbuckets); }; struct pnfs_ds_commit_info { @@ -1854,6 +1854,24 @@ struct nfs_rpc_ops { }; /* + * Helper functions used by NFS client and/or server + */ +static inline void encode_opaque_fixed(struct xdr_stream *xdr, + const void *buf, size_t len) +{ + WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0); +} + +static inline int decode_opaque_fixed(struct xdr_stream *xdr, + void *buf, size_t len) +{ + ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len); + if (unlikely(ret < 0)) + return -EIO; + return 0; +} + +/* * Function vectors etc. for the NFS client */ extern const struct nfs_rpc_ops nfs_v2_clientops; @@ -1866,4 +1884,4 @@ extern const struct rpc_version nfs_version4; extern const struct rpc_version nfsacl_version3; extern const struct rpc_program nfsacl_program; -#endif +#endif /* _LINUX_NFS_XDR_H */ diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h new file mode 100644 index 000000000000..9202f4b24343 --- /dev/null +++ b/include/linux/nfslocalio.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + * Copyright (C) 2024 NeilBrown <neilb@suse.de> + */ +#ifndef __LINUX_NFSLOCALIO_H +#define __LINUX_NFSLOCALIO_H + +/* nfsd_file structure is purposely kept opaque to NFS client */ +struct nfsd_file; + +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + +#include <linux/module.h> +#include <linux/list.h> +#include <linux/uuid.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/nfs.h> +#include <net/net_namespace.h> + +/* + * Useful to allow a client to negotiate if localio + * possible with its server. + * + * See Documentation/filesystems/nfs/localio.rst for more detail. + */ +typedef struct { + uuid_t uuid; + struct list_head list; + struct net __rcu *net; /* nfsd's network namespace */ + struct auth_domain *dom; /* auth_domain for localio */ +} nfs_uuid_t; + +void nfs_uuid_init(nfs_uuid_t *); +bool nfs_uuid_begin(nfs_uuid_t *); +void nfs_uuid_end(nfs_uuid_t *); +void nfs_uuid_is_local(const uuid_t *, struct list_head *, + struct net *, struct auth_domain *, struct module *); +void nfs_uuid_invalidate_clients(struct list_head *list); +void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid); + +/* localio needs to map filehandle -> struct nfsd_file */ +extern struct nfsd_file * +nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, + const struct cred *, const struct nfs_fh *, + const fmode_t) __must_hold(rcu); + +struct nfsd_localio_operations { + bool (*nfsd_serv_try_get)(struct net *); + void (*nfsd_serv_put)(struct net *); + struct nfsd_file *(*nfsd_open_local_fh)(struct net *, + struct auth_domain *, + struct rpc_clnt *, + const struct cred *, + const struct nfs_fh *, + const fmode_t); + struct net *(*nfsd_file_put_local)(struct nfsd_file *); + struct file *(*nfsd_file_file)(struct nfsd_file *); +} ____cacheline_aligned; + +extern void nfsd_localio_ops_init(void); +extern const struct nfsd_localio_operations *nfs_to; + +struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, + struct rpc_clnt *, const struct cred *, + const struct nfs_fh *, const fmode_t); + +static inline void nfs_to_nfsd_net_put(struct net *net) +{ + /* + * Once reference to nfsd_serv is dropped, NFSD could be + * unloaded, so ensure safe return from nfsd_file_put_local() + * by always taking RCU. + */ + rcu_read_lock(); + nfs_to->nfsd_serv_put(net); + rcu_read_unlock(); +} + +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +{ + /* + * Must not hold RCU otherwise nfsd_file_put() can easily trigger: + * "Voluntary context switch within RCU read-side critical section!" + * by scheduling deep in underlying filesystem (e.g. XFS). + */ + struct net *net = nfs_to->nfsd_file_put_local(localio); + + nfs_to_nfsd_net_put(net); +} + +#else /* CONFIG_NFS_LOCALIO */ +static inline void nfsd_localio_ops_init(void) +{ +} +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +{ +} +#endif /* CONFIG_NFS_LOCALIO */ + +#endif /* __LINUX_NFSLOCALIO_H */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index b61438313a73..9fd7a0ce9c1a 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -107,11 +107,11 @@ extern nodemask_t _unused_nodemask_arg_; */ #define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ __nodemask_pr_bits(maskp) -static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) +static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) { return m ? MAX_NUMNODES : 0; } -static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) +static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) { return m ? m->bits : NULL; } @@ -132,19 +132,19 @@ static __always_inline void __node_set(int node, volatile nodemask_t *dstp) } #define node_clear(node, dst) __node_clear((node), &(dst)) -static inline void __node_clear(int node, volatile nodemask_t *dstp) +static __always_inline void __node_clear(int node, volatile nodemask_t *dstp) { clear_bit(node, dstp->bits); } #define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES) -static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) +static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits) { bitmap_fill(dstp->bits, nbits); } #define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES) -static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) +static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) { bitmap_zero(dstp->bits, nbits); } @@ -154,14 +154,14 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits) #define node_test_and_set(node, nodemask) \ __node_test_and_set((node), &(nodemask)) -static inline bool __node_test_and_set(int node, nodemask_t *addr) +static __always_inline bool __node_test_and_set(int node, nodemask_t *addr) { return test_and_set_bit(node, addr->bits); } #define nodes_and(dst, src1, src2) \ __nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -169,7 +169,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_or(dst, src1, src2) \ __nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -177,7 +177,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_xor(dst, src1, src2) \ __nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -185,7 +185,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_andnot(dst, src1, src2) \ __nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES) -static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, +static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); @@ -193,7 +193,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p, #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) -static inline void __nodes_complement(nodemask_t *dstp, +static __always_inline void __nodes_complement(nodemask_t *dstp, const nodemask_t *srcp, unsigned int nbits) { bitmap_complement(dstp->bits, srcp->bits, nbits); @@ -201,7 +201,7 @@ static inline void __nodes_complement(nodemask_t *dstp, #define nodes_equal(src1, src2) \ __nodes_equal(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_equal(const nodemask_t *src1p, +static __always_inline bool __nodes_equal(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_equal(src1p->bits, src2p->bits, nbits); @@ -209,7 +209,7 @@ static inline bool __nodes_equal(const nodemask_t *src1p, #define nodes_intersects(src1, src2) \ __nodes_intersects(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_intersects(const nodemask_t *src1p, +static __always_inline bool __nodes_intersects(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_intersects(src1p->bits, src2p->bits, nbits); @@ -217,33 +217,33 @@ static inline bool __nodes_intersects(const nodemask_t *src1p, #define nodes_subset(src1, src2) \ __nodes_subset(&(src1), &(src2), MAX_NUMNODES) -static inline bool __nodes_subset(const nodemask_t *src1p, +static __always_inline bool __nodes_subset(const nodemask_t *src1p, const nodemask_t *src2p, unsigned int nbits) { return bitmap_subset(src1p->bits, src2p->bits, nbits); } #define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES) -static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) +static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits) { return bitmap_empty(srcp->bits, nbits); } #define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES) -static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) +static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits) { return bitmap_full(srcp->bits, nbits); } #define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES) -static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) +static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits) { return bitmap_weight(srcp->bits, nbits); } #define nodes_shift_right(dst, src, n) \ __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES) -static inline void __nodes_shift_right(nodemask_t *dstp, +static __always_inline void __nodes_shift_right(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); @@ -251,7 +251,7 @@ static inline void __nodes_shift_right(nodemask_t *dstp, #define nodes_shift_left(dst, src, n) \ __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES) -static inline void __nodes_shift_left(nodemask_t *dstp, +static __always_inline void __nodes_shift_left(nodemask_t *dstp, const nodemask_t *srcp, int n, int nbits) { bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); @@ -261,13 +261,13 @@ static inline void __nodes_shift_left(nodemask_t *dstp, > MAX_NUMNODES, then the silly min_ts could be dropped. */ #define first_node(src) __first_node(&(src)) -static inline unsigned int __first_node(const nodemask_t *srcp) +static __always_inline unsigned int __first_node(const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES)); } #define next_node(n, src) __next_node((n), &(src)) -static inline unsigned int __next_node(int n, const nodemask_t *srcp) +static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp) { return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1)); } @@ -277,7 +277,7 @@ static inline unsigned int __next_node(int n, const nodemask_t *srcp) * the first node in src if needed. Returns MAX_NUMNODES if src is empty. */ #define next_node_in(n, src) __next_node_in((n), &(src)) -static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) +static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp) { unsigned int ret = __next_node(node, srcp); @@ -286,7 +286,7 @@ static inline unsigned int __next_node_in(int node, const nodemask_t *srcp) return ret; } -static inline void init_nodemask_of_node(nodemask_t *mask, int node) +static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node) { nodes_clear(*mask); node_set(node, *mask); @@ -304,7 +304,7 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node) }) #define first_unset_node(mask) __first_unset_node(&(mask)) -static inline unsigned int __first_unset_node(const nodemask_t *maskp) +static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp) { return min_t(unsigned int, MAX_NUMNODES, find_first_zero_bit(maskp->bits, MAX_NUMNODES)); @@ -338,21 +338,21 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp) #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) -static inline int __nodemask_parse_user(const char __user *buf, int len, +static __always_inline int __nodemask_parse_user(const char __user *buf, int len, nodemask_t *dstp, int nbits) { return bitmap_parse_user(buf, len, dstp->bits, nbits); } #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) -static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) +static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { return bitmap_parselist(buf, dstp->bits, nbits); } #define node_remap(oldbit, old, new) \ __node_remap((oldbit), &(old), &(new), MAX_NUMNODES) -static inline int __node_remap(int oldbit, +static __always_inline int __node_remap(int oldbit, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); @@ -360,7 +360,7 @@ static inline int __node_remap(int oldbit, #define nodes_remap(dst, src, old, new) \ __nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES) -static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, +static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, const nodemask_t *oldp, const nodemask_t *newp, int nbits) { bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); @@ -368,7 +368,7 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp, #define nodes_onto(dst, orig, relmap) \ __nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES) -static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, +static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, const nodemask_t *relmapp, int nbits) { bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits); @@ -376,7 +376,7 @@ static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp, #define nodes_fold(dst, orig, sz) \ __nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES) -static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, +static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp, int sz, int nbits) { bitmap_fold(dstp->bits, origp->bits, sz, nbits); @@ -418,22 +418,22 @@ enum node_states { extern nodemask_t node_states[NR_NODE_STATES]; #if MAX_NUMNODES > 1 -static inline int node_state(int node, enum node_states state) +static __always_inline int node_state(int node, enum node_states state) { return node_isset(node, node_states[state]); } -static inline void node_set_state(int node, enum node_states state) +static __always_inline void node_set_state(int node, enum node_states state) { __node_set(node, &node_states[state]); } -static inline void node_clear_state(int node, enum node_states state) +static __always_inline void node_clear_state(int node, enum node_states state) { __node_clear(node, &node_states[state]); } -static inline int num_node_state(enum node_states state) +static __always_inline int num_node_state(enum node_states state) { return nodes_weight(node_states[state]); } @@ -443,11 +443,11 @@ static inline int num_node_state(enum node_states state) #define first_online_node first_node(node_states[N_ONLINE]) #define first_memory_node first_node(node_states[N_MEMORY]) -static inline unsigned int next_online_node(int nid) +static __always_inline unsigned int next_online_node(int nid) { return next_node(nid, node_states[N_ONLINE]); } -static inline unsigned int next_memory_node(int nid) +static __always_inline unsigned int next_memory_node(int nid) { return next_node(nid, node_states[N_MEMORY]); } @@ -455,13 +455,13 @@ static inline unsigned int next_memory_node(int nid) extern unsigned int nr_node_ids; extern unsigned int nr_online_nodes; -static inline void node_set_online(int nid) +static __always_inline void node_set_online(int nid) { node_set_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); } -static inline void node_set_offline(int nid) +static __always_inline void node_set_offline(int nid) { node_clear_state(nid, N_ONLINE); nr_online_nodes = num_node_state(N_ONLINE); @@ -469,20 +469,20 @@ static inline void node_set_offline(int nid) #else -static inline int node_state(int node, enum node_states state) +static __always_inline int node_state(int node, enum node_states state) { return node == 0; } -static inline void node_set_state(int node, enum node_states state) +static __always_inline void node_set_state(int node, enum node_states state) { } -static inline void node_clear_state(int node, enum node_states state) +static __always_inline void node_clear_state(int node, enum node_states state) { } -static inline int num_node_state(enum node_states state) +static __always_inline int num_node_state(enum node_states state) { return 1; } @@ -502,7 +502,7 @@ static inline int num_node_state(enum node_states state) #endif -static inline int node_random(const nodemask_t *maskp) +static __always_inline int node_random(const nodemask_t *maskp) { #if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1) int w, bit; diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 45702bdcbceb..b42e64734968 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -237,7 +237,5 @@ static inline int notifier_to_errno(int ret) #define KBD_KEYSYM 0x0004 /* Keyboard keysym */ #define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */ -extern struct blocking_notifier_head reboot_notifier_list; - #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/include/linux/numa.h b/include/linux/numa.h index eb19503604fe..3567e40329eb 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -30,6 +30,12 @@ static inline bool numa_valid_node(int nid) #ifdef CONFIG_NUMA #include <asm/sparsemem.h> +extern struct pglist_data *node_data[]; +#define NODE_DATA(nid) (node_data[nid]) + +void __init alloc_node_data(int nid); +void __init alloc_offline_node_data(int nid); + /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); @@ -57,6 +63,8 @@ static inline int phys_to_target_node(u64 start) { return 0; } + +static inline void alloc_offline_node_data(int nid) {} #endif #define numa_map_to_online_node(node) numa_nearest_node(node, N_ONLINE) diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h new file mode 100644 index 000000000000..cfad6ce7e1bd --- /dev/null +++ b/include/linux/numa_memblks.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NUMA_MEMBLKS_H +#define __NUMA_MEMBLKS_H + +#ifdef CONFIG_NUMA_MEMBLKS +#include <linux/types.h> + +#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2) + +void __init numa_set_distance(int from, int to, int distance); +void __init numa_reset_distance(void); + +struct numa_memblk { + u64 start; + u64 end; + int nid; +}; + +struct numa_meminfo { + int nr_blks; + struct numa_memblk blk[NR_NODE_MEMBLKS]; +}; + +int __init numa_add_memblk(int nodeid, u64 start, u64 end); +void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); + +int __init numa_cleanup_meminfo(struct numa_meminfo *mi); + +int __init numa_memblks_init(int (*init_func)(void), + bool memblock_force_top_down); + +#ifdef CONFIG_NUMA_EMU +int numa_emu_cmdline(char *str); +void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, + unsigned int nr_emu_nids); +u64 __init numa_emu_dma_end(void); +void __init numa_emulation(struct numa_meminfo *numa_meminfo, + int numa_dist_cnt); +#else +static inline void numa_emulation(struct numa_meminfo *numa_meminfo, + int numa_dist_cnt) +{ } +static inline int numa_emu_cmdline(char *str) +{ + return -EINVAL; +} +#endif /* CONFIG_NUMA_EMU */ + +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(u64 start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif /* CONFIG_NUMA_KEEP_MEMINFO */ + +#endif /* CONFIG_NUMA_MEMBLKS */ + +#endif /* __NUMA_MEMBLKS_H */ diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index e10333d78dbb..19d2b256180f 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -12,7 +12,7 @@ key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); - +struct key *nvme_tls_key_lookup(key_serial_t key_id); #else static inline key_serial_t nvme_tls_psk_default(struct key *keyring, @@ -24,5 +24,9 @@ static inline key_serial_t nvme_keyring_id(void) { return 0; } +static inline struct key *nvme_tls_key_lookup(key_serial_t key_id) +{ + return ERR_PTR(-ENOTSUPP); +} #endif /* !CONFIG_NVME_KEYRING */ #endif /* _NVME_KEYRING_H */ diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index eb2f04d636c8..97c5f00b9aa3 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -25,6 +25,7 @@ enum nvme_rdma_cm_status { NVME_RDMA_CM_NO_RSC = 0x06, NVME_RDMA_CM_INVALID_IRD = 0x07, NVME_RDMA_CM_INVALID_ORD = 0x08, + NVME_RDMA_CM_INVALID_CNTLID = 0x09, }; static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) @@ -46,6 +47,8 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) return "invalid IRD"; case NVME_RDMA_CM_INVALID_ORD: return "Invalid ORD"; + case NVME_RDMA_CM_INVALID_CNTLID: + return "invalid controller ID"; default: return "unrecognized reason"; } @@ -64,7 +67,8 @@ struct nvme_rdma_cm_req { __le16 qid; __le16 hrqsize; __le16 hsqsize; - u8 rsvd[24]; + __le16 cntlid; + u8 rsvd[22]; }; /** diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7b2ae2e43544..13377dde4527 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -327,7 +327,8 @@ struct nvme_id_ctrl { __le32 sanicap; __le32 hmminds; __le16 hmmaxd; - __u8 rsvd338[4]; + __le16 nvmsetidmax; + __le16 endgidmax; __u8 anatt; __u8 anacap; __le32 anagrpmax; @@ -388,6 +389,11 @@ enum { NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7, NVME_CTRL_CTRATT_UUID_LIST = 1 << 9, + NVME_CTRL_SGLS_BYTE_ALIGNED = 1, + NVME_CTRL_SGLS_DWORD_ALIGNED = 2, + NVME_CTRL_SGLS_KSDBDS = 1 << 2, + NVME_CTRL_SGLS_MSDS = 1 << 19, + NVME_CTRL_SGLS_SAOS = 1 << 20, }; struct nvme_lbaf { @@ -522,6 +528,7 @@ enum { NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_CS_NS = 0x05, NVME_ID_CNS_CS_CTRL = 0x06, + NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07, NVME_ID_CNS_NS_CS_INDEP = 0x08, NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT = 0x11, @@ -530,6 +537,7 @@ enum { NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15, NVME_ID_CNS_NS_GRANULARITY = 0x16, NVME_ID_CNS_UUID_LIST = 0x17, + NVME_ID_CNS_ENDGRP_LIST = 0x19, }; enum { @@ -560,6 +568,8 @@ enum { NVME_NS_FLBAS_LBA_SHIFT = 1, NVME_NS_FLBAS_META_EXT = 0x10, NVME_NS_NMIC_SHARED = 1 << 0, + NVME_NS_ROTATIONAL = 1 << 4, + NVME_NS_VWC_NOT_PRESENT = 1 << 5, NVME_LBAF_RP_BEST = 0, NVME_LBAF_RP_BETTER = 1, NVME_LBAF_RP_GOOD = 2, @@ -617,6 +627,40 @@ enum { NVME_NIDT_CSI = 0x04, }; +struct nvme_endurance_group_log { + __u8 egcw; + __u8 egfeat; + __u8 rsvd2; + __u8 avsp; + __u8 avspt; + __u8 pused; + __le16 did; + __u8 rsvd8[24]; + __u8 ee[16]; + __u8 dur[16]; + __u8 duw[16]; + __u8 muw[16]; + __u8 hrc[16]; + __u8 hwc[16]; + __u8 mdie[16]; + __u8 neile[16]; + __u8 tegcap[16]; + __u8 uegcap[16]; + __u8 rsvd192[320]; +}; + +struct nvme_rotational_media_log { + __le16 endgid; + __le16 numa; + __le16 nrs; + __u8 rsvd6[2]; + __le32 spinc; + __le32 fspinc; + __le32 ldc; + __le32 fldc; + __u8 rsvd24[488]; +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; @@ -987,8 +1031,8 @@ struct nvme_rw_command { __le16 control; __le32 dsmgmt; __le32 reftag; - __le16 apptag; - __le16 appmask; + __le16 lbat; + __le16 lbatm; }; enum { @@ -1057,8 +1101,8 @@ struct nvme_write_zeroes_cmd { __le16 control; __le32 dsmgmt; __le32 reftag; - __le16 apptag; - __le16 appmask; + __le16 lbat; + __le16 lbatm; }; enum nvme_zone_mgmt_action { @@ -1244,6 +1288,7 @@ enum { NVME_FEAT_WRITE_PROTECT = 0x84, NVME_FEAT_VENDOR_START = 0xC0, NVME_FEAT_VENDOR_END = 0xFF, + NVME_LOG_SUPPORTED = 0x00, NVME_LOG_ERROR = 0x01, NVME_LOG_SMART = 0x02, NVME_LOG_FW_SLOT = 0x03, @@ -1254,6 +1299,8 @@ enum { NVME_LOG_TELEMETRY_CTRL = 0x08, NVME_LOG_ENDURANCE_GROUP = 0x09, NVME_LOG_ANA = 0x0c, + NVME_LOG_FEATURES = 0x12, + NVME_LOG_RMI = 0x16, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1261,6 +1308,24 @@ enum { NVME_FWACT_ACTV = (2 << 3), }; +struct nvme_supported_log { + __le32 lids[256]; +}; + +enum { + NVME_LIDS_LSUPP = 1 << 0, +}; + +struct nvme_supported_features_log { + __le32 fis[256]; +}; + +enum { + NVME_FIS_FSUPP = 1 << 0, + NVME_FIS_NSCPE = 1 << 20, + NVME_FIS_CSCPE = 1 << 21, +}; + /* NVMe Namespace Write Protect State */ enum { NVME_NS_NO_WRITE_PROTECT = 0, @@ -1281,7 +1346,8 @@ struct nvme_identify { __u8 cns; __u8 rsvd3; __le16 ctrlid; - __u8 rsvd11[3]; + __le16 cnssid; + __u8 rsvd11; __u8 csi; __u32 rsvd12[4]; }; @@ -1389,7 +1455,7 @@ struct nvme_get_log_page_command { __u8 lsp; /* upper 4 bits reserved */ __le16 numdl; __le16 numdu; - __u16 rsvd11; + __le16 lsi; union { struct { __le32 lpol; @@ -2037,4 +2103,81 @@ struct nvme_completion { #define NVME_MINOR(ver) (((ver) >> 8) & 0xff) #define NVME_TERTIARY(ver) ((ver) & 0xff) +enum { + NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00, +}; + +enum { + NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00, + NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01, + NVME_PR_LOG_RESERVATION_RELEASED = 0x02, + NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03, +}; + +enum { + NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1, + NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2, + NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3, +}; + +struct nvme_pr_log { + __le64 count; + __u8 type; + __u8 nr_pages; + __u8 rsvd1[2]; + __le32 nsid; + __u8 rsvd2[48]; +}; + +struct nvmet_pr_register_data { + __le64 crkey; + __le64 nrkey; +}; + +struct nvmet_pr_acquire_data { + __le64 crkey; + __le64 prkey; +}; + +struct nvmet_pr_release_data { + __le64 crkey; +}; + +enum nvme_pr_capabilities { + NVME_PR_SUPPORT_PTPL = 1, + NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1, + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2, + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3, + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4, + NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5, + NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6, + NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7, +}; + +enum nvme_pr_register_action { + NVME_PR_REGISTER_ACT_REG = 0, + NVME_PR_REGISTER_ACT_UNREG = 1, + NVME_PR_REGISTER_ACT_REPLACE = 1 << 1, +}; + +enum nvme_pr_acquire_action { + NVME_PR_ACQUIRE_ACT_ACQUIRE = 0, + NVME_PR_ACQUIRE_ACT_PREEMPT = 1, + NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1, +}; + +enum nvme_pr_release_action { + NVME_PR_RELEASE_ACT_RELEASE = 0, + NVME_PR_RELEASE_ACT_CLEAR = 1, +}; + +enum nvme_pr_change_ptpl { + NVME_PR_CPTPL_NO_CHANGE = 0, + NVME_PR_CPTPL_RESV = 1 << 30, + NVME_PR_CPTPL_CLEARED = 2 << 30, + NVME_PR_CPTPL_PERSIST = 3 << 30, +}; + +#define NVME_PR_IGNORE_KEY (1 << 3) + #endif /* _LINUX_NVME_H */ diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h new file mode 100644 index 000000000000..15f58e3c56c7 --- /dev/null +++ b/include/linux/oa_tc6.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * OPEN Alliance 10BASE‑T1x MAC‑PHY Serial Interface framework + * + * Link: https://opensig.org/download/document/OPEN_Alliance_10BASET1x_MAC-PHY_Serial_Interface_V1.1.pdf + * + * Author: Parthiban Veerasooran <parthiban.veerasooran@microchip.com> + */ + +#include <linux/etherdevice.h> +#include <linux/spi/spi.h> + +struct oa_tc6; + +struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev); +void oa_tc6_exit(struct oa_tc6 *tc6); +int oa_tc6_write_register(struct oa_tc6 *tc6, u32 address, u32 value); +int oa_tc6_write_registers(struct oa_tc6 *tc6, u32 address, u32 value[], + u8 length); +int oa_tc6_read_register(struct oa_tc6 *tc6, u32 address, u32 *value); +int oa_tc6_read_registers(struct oa_tc6 *tc6, u32 address, u32 value[], + u8 length); +netdev_tx_t oa_tc6_start_xmit(struct oa_tc6 *tc6, struct sk_buff *skb); +int oa_tc6_zero_align_receive_frame_enable(struct oa_tc6 *tc6); diff --git a/include/linux/of.h b/include/linux/of.h index 85b60ac9eec5..f921786cb8ac 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -289,6 +289,9 @@ extern struct device_node *of_get_parent(const struct device_node *node); extern struct device_node *of_get_next_parent(struct device_node *node); extern struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_next_child_with_prefix(const struct device_node *node, + struct device_node *prev, + const char *prefix); extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_next_reserved_child( @@ -357,7 +360,7 @@ extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread); extern struct device_node *of_cpu_device_node_get(int cpu); extern int of_cpu_node_to_id(struct device_node *np); extern struct device_node *of_get_next_cpu_node(struct device_node *prev); -extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, +extern struct device_node *of_get_cpu_state_node(const struct device_node *cpu_node, int index); extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); @@ -395,7 +398,7 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it, int size); extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); -extern int of_alias_get_id(struct device_node *np, const char *stem); +extern int of_alias_get_id(const struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); bool of_machine_compatible_match(const char *const *compats); @@ -435,7 +438,7 @@ extern int of_detach_node(struct device_node *); * of_property_for_each_u32(np, "propname", u) * printk("U32 value: %x\n", u); */ -const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, +const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu); /* * struct property *prop; @@ -444,11 +447,11 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, * of_property_for_each_string(np, "propname", prop, s) * printk("String value: %s\n", s); */ -const char *of_prop_next_string(struct property *prop, const char *cur); +const char *of_prop_next_string(const struct property *prop, const char *cur); -bool of_console_check(struct device_node *dn, char *name, int index); +bool of_console_check(const struct device_node *dn, char *name, int index); -int of_map_id(struct device_node *np, u32 id, +int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); @@ -826,13 +829,13 @@ static inline bool of_console_check(const struct device_node *dn, const char *na return false; } -static inline const __be32 *of_prop_next_u32(struct property *prop, +static inline const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { return NULL; } -static inline const char *of_prop_next_string(struct property *prop, +static inline const char *of_prop_next_string(const struct property *prop, const char *cur) { return NULL; @@ -871,7 +874,7 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag { } -static inline int of_map_id(struct device_node *np, u32 id, +static inline int of_map_id(const struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out) { @@ -899,7 +902,7 @@ static inline const void *of_device_get_match_data(const struct device *dev) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif -static inline int of_prop_val_eq(struct property *p1, struct property *p2) +static inline int of_prop_val_eq(const struct property *p1, const struct property *p2) { return p1->length == p2->length && !memcmp(p1->value, p2->value, (size_t)p1->length); @@ -1252,7 +1255,7 @@ static inline int of_property_read_string_index(const struct device_node *np, static inline bool of_property_read_bool(const struct device_node *np, const char *propname) { - struct property *prop = of_find_property(np, propname, NULL); + const struct property *prop = of_find_property(np, propname, NULL); return prop ? true : false; } @@ -1430,7 +1433,7 @@ static inline int of_property_read_s32(const struct device_node *np, err = of_phandle_iterator_next(it)) #define of_property_for_each_u32(np, propname, u) \ - for (struct {struct property *prop; const __be32 *item; } _it = \ + for (struct {const struct property *prop; const __be32 *item; } _it = \ {of_find_property(np, propname, NULL), \ of_prop_next_u32(_it.prop, NULL, &u)}; \ _it.item; \ @@ -1468,6 +1471,12 @@ static inline int of_property_read_s32(const struct device_node *np, child != NULL; \ child = of_get_next_child(parent, child)) +#define for_each_child_of_node_with_prefix(parent, child, prefix) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_child_with_prefix(parent, NULL, prefix); \ + child != NULL; \ + child = of_get_next_child_with_prefix(parent, child, prefix)) + #define for_each_available_child_of_node(parent, child) \ for (child = of_get_next_available_child(parent, NULL); child != NULL; \ child = of_get_next_available_child(parent, child)) @@ -1651,6 +1660,10 @@ static inline int of_changeset_add_prop_u32(struct of_changeset *ocs, return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1); } +int of_changeset_update_prop_string(struct of_changeset *ocs, + struct device_node *np, + const char *prop_name, const char *str); + int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np, const char *prop_name); @@ -1734,7 +1747,7 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id, struct device_node *target_base); + int *ovcs_id, const struct device_node *target_base); int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); @@ -1744,7 +1757,7 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size, - int *ovcs_id, struct device_node *target_base) + int *ovcs_id, const struct device_node *target_base) { return -ENOTSUPP; } diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 26a19daf0d09..9e034363788a 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -10,7 +10,7 @@ struct of_bus; struct of_pci_range_parser { struct device_node *node; - struct of_bus *bus; + const struct of_bus *bus; const __be32 *range; const __be32 *end; int na; @@ -83,8 +83,8 @@ extern struct of_pci_range *of_pci_range_parser_one( struct of_pci_range *range); extern int of_pci_address_to_resource(struct device_node *dev, int bar, struct resource *r); -extern int of_pci_range_to_resource(struct of_pci_range *range, - struct device_node *np, +extern int of_pci_range_to_resource(const struct of_pci_range *range, + const struct device_node *np, struct resource *res); extern int of_range_to_resource(struct device_node *np, int index, struct resource *res); diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index d69ad5bb1eb1..b8d6c0c20876 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -31,6 +31,7 @@ extern void *of_fdt_unflatten_tree(const unsigned long *blob, extern int __initdata dt_root_addr_cells; extern int __initdata dt_root_size_cells; extern void *initial_boot_params; +extern phys_addr_t initial_boot_params_pa; extern char __dtb_start[]; extern char __dtb_end[]; @@ -70,8 +71,8 @@ extern u64 dt_mem_next_cell(int s, const __be32 **cellp); /* Early flat tree scan hooks */ extern int early_init_dt_scan_root(void); -extern bool early_init_dt_scan(void *params); -extern bool early_init_dt_verify(void *params); +extern bool early_init_dt_scan(void *dt_virt, phys_addr_t dt_phys); +extern bool early_init_dt_verify(void *dt_virt, phys_addr_t dt_phys); extern void early_init_dt_scan_nodes(void); extern const char *of_flat_dt_get_machine_name(void); diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index a4bea62bfa29..a692d9d979a6 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OF_GRAPH_H #define __LINUX_OF_GRAPH_H +#include <linux/cleanup.h> #include <linux/types.h> #include <linux/errno.h> @@ -37,14 +38,43 @@ struct of_endpoint { for (child = of_graph_get_next_endpoint(parent, NULL); child != NULL; \ child = of_graph_get_next_endpoint(parent, child)) +/** + * for_each_of_graph_port - iterate over every port in a device or ports node + * @parent: parent device or ports node containing port + * @child: loop variable pointing to the current port node + * + * When breaking out of the loop, and continue to use the @child, you need to + * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it. + */ +#define for_each_of_graph_port(parent, child) \ + for (struct device_node *child __free(device_node) = of_graph_get_next_port(parent, NULL);\ + child != NULL; child = of_graph_get_next_port(parent, child)) + +/** + * for_each_of_graph_port_endpoint - iterate over every endpoint in a port node + * @parent: parent port node + * @child: loop variable pointing to the current endpoint node + * + * When breaking out of the loop, and continue to use the @child, you need to + * use return_ptr(@child) or no_free_ptr(@child) not to call __free() for it. + */ +#define for_each_of_graph_port_endpoint(parent, child) \ + for (struct device_node *child __free(device_node) = of_graph_get_next_port_endpoint(parent, NULL);\ + child != NULL; child = of_graph_get_next_port_endpoint(parent, child)) + #ifdef CONFIG_OF bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); unsigned int of_graph_get_endpoint_count(const struct device_node *np); +unsigned int of_graph_get_port_count(struct device_node *np); struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); +struct device_node *of_graph_get_next_port(const struct device_node *parent, + struct device_node *port); +struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port, + struct device_node *prev); struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg); struct device_node *of_graph_get_remote_endpoint( @@ -73,6 +103,11 @@ static inline unsigned int of_graph_get_endpoint_count(const struct device_node return 0; } +static inline unsigned int of_graph_get_port_count(struct device_node *np) +{ + return 0; +} + static inline struct device_node *of_graph_get_port_by_id( struct device_node *node, u32 id) { @@ -86,6 +121,20 @@ static inline struct device_node *of_graph_get_next_endpoint( return NULL; } +static inline struct device_node *of_graph_get_next_port( + const struct device_node *parent, + struct device_node *previous) +{ + return NULL; +} + +static inline struct device_node *of_graph_get_next_port_endpoint( + const struct device_node *parent, + struct device_node *previous) +{ + return NULL; +} + static inline struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index d6d3eae2f145..6337ad4e5fe8 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -48,12 +48,12 @@ extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); extern struct device_node *of_irq_find_parent(struct device_node *child); extern struct irq_domain *of_msi_get_domain(struct device *dev, - struct device_node *np, + const struct device_node *np, enum irq_domain_bus_token token); extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, u32 id, u32 bus_token); -extern void of_msi_configure(struct device *dev, struct device_node *np); +extern void of_msi_configure(struct device *dev, const struct device_node *np); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else static inline void of_irq_init(const struct of_device_id *matches) diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 082841908fe7..c9e3843d2dd5 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -84,13 +84,3 @@ extern void gpmc_read_settings_dt(struct device_node *np, struct gpmc_timings; struct omap_nand_platform_data; struct omap_onenand_platform_data; - -#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); -#else -#define board_onenand_data NULL -static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) -{ - return 0; -} -#endif diff --git a/include/linux/once.h b/include/linux/once.h index bc714d414448..30346fcdc799 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -46,7 +46,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, #define DO_ONCE(func, ...) \ ({ \ bool ___ret = false; \ - static bool __section(".data.once") ___done = false; \ + static bool __section(".data..once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ unsigned long ___flags; \ @@ -64,7 +64,7 @@ void __do_once_sleepable_done(bool *done, struct static_key_true *once_key, #define DO_ONCE_SLEEPABLE(func, ...) \ ({ \ bool ___ret = false; \ - static bool __section(".data.once") ___done = false; \ + static bool __section(".data..once") ___done = false; \ static DEFINE_STATIC_KEY_TRUE(___once_key); \ if (static_branch_unlikely(&___once_key)) { \ ___ret = __do_once_sleepable_start(&___done); \ diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h index b7bce4983638..27de7bc32a06 100644 --- a/include/linux/once_lite.h +++ b/include/linux/once_lite.h @@ -12,7 +12,7 @@ #define __ONCE_LITE_IF(condition) \ ({ \ - static bool __section(".data.once") __already_done; \ + static bool __section(".data..once") __already_done; \ bool __ret_cond = !!(condition); \ bool __ret_once = false; \ \ diff --git a/include/linux/oom.h b/include/linux/oom.h index 7d0c9c48a0c5..1e0fc6931ce9 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -7,7 +7,6 @@ #include <linux/types.h> #include <linux/nodemask.h> #include <uapi/linux/oom.h> -#include <linux/sched/coredump.h> /* MMF_* */ #include <linux/mm.h> /* VM_FAULT* */ struct zonelist; diff --git a/include/linux/packing.h b/include/linux/packing.h index 8d6571feb95d..5d36dcd06f60 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -17,33 +17,13 @@ enum packing_op { UNPACK, }; -/** - * packing - Convert numbers (currently u64) between a packed and an unpacked - * format. Unpacked means laid out in memory in the CPU's native - * understanding of integers, while packed means anything else that - * requires translation. - * - * @pbuf: Pointer to a buffer holding the packed value. - * @uval: Pointer to an u64 holding the unpacked value. - * @startbit: The index (in logical notation, compensated for quirks) where - * the packed value starts within pbuf. Must be larger than, or - * equal to, endbit. - * @endbit: The index (in logical notation, compensated for quirks) where - * the packed value ends within pbuf. Must be smaller than, or equal - * to, startbit. - * @op: If PACK, then uval will be treated as const pointer and copied (packed) - * into pbuf, between startbit and endbit. - * If UNPACK, then pbuf will be treated as const pointer and the logical - * value between startbit and endbit will be copied (unpacked) to uval. - * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and - * QUIRK_MSB_ON_THE_RIGHT. - * - * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. - * If op is PACK, pbuf is modified. - * If op is UNPACK, uval is modified. - */ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, enum packing_op op, u8 quirks); +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks); + +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks); + #endif diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7d79818dc065..4f5c9e979bb9 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -111,5 +111,12 @@ ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \ NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH) +#define NR_NON_PAGEFLAG_BITS (SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + \ + LAST_CPUPID_SHIFT + KASAN_TAG_WIDTH + \ + LRU_GEN_WIDTH + LRU_REFS_WIDTH) + +#define NR_UNUSED_PAGEFLAG_BITS (BITS_PER_LONG - \ + (NR_NON_PAGEFLAG_BITS + NR_PAGEFLAGS)) + #endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5769fe6e4950..cf46ac720802 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -66,8 +66,6 @@ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and * file-backed pagecache (see mm/vmscan.c). * - * PG_error is set to indicate that an I/O error occurred on this page. - * * PG_arch_1 is an architecture specific page state bit. The generic code * guarantees that this bit is cleared for a page when it first is entered into * the page cache. @@ -103,22 +101,18 @@ enum pageflags { PG_waiters, /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */ PG_active, PG_workingset, - PG_error, - PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ + PG_owner_priv_1, /* Owner use. If pagecache, fs may use */ + PG_owner_2, /* Owner use. If pagecache, fs may use */ PG_arch_1, PG_reserved, PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ - PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif -#ifdef CONFIG_ARCH_USES_PG_UNCACHED - PG_uncached, /* Page has been mapped as uncached */ -#endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif @@ -126,14 +120,21 @@ enum pageflags { PG_young, PG_idle, #endif -#ifdef CONFIG_ARCH_USES_PG_ARCH_X +#ifdef CONFIG_ARCH_USES_PG_ARCH_2 PG_arch_2, +#endif +#ifdef CONFIG_ARCH_USES_PG_ARCH_3 PG_arch_3, #endif __NR_PAGEFLAGS, PG_readahead = PG_reclaim, + /* Anonymous memory (and shmem) */ + PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* Some filesystems */ + PG_checked = PG_owner_priv_1, + /* * Depending on the way an anonymous folio can be mapped into a page * table (e.g., single PMD/PUD/CONT of the head page vs. PTE-mapped @@ -141,13 +142,13 @@ enum pageflags { * tail pages of an anonymous folio. For now, we only expect it to be * set on tail pages for PTE-mapped THP. */ - PG_anon_exclusive = PG_mappedtodisk, + PG_anon_exclusive = PG_owner_2, - /* Filesystems */ - PG_checked = PG_owner_priv_1, - - /* SwapBacked */ - PG_swapcache = PG_owner_priv_1, /* Swap page: swp_entry_t in private */ + /* + * Set if all buffer heads in the folio are mapped. + * Filesystems which do not use BHs can use it for their own purpose. + */ + PG_mappedtodisk = PG_owner_2, /* Two page bits are conscripted by FS-Cache to maintain local caching * state. These bits are set on pages belonging to the netfs's inodes @@ -183,8 +184,9 @@ enum pageflags { */ /* At least one page in this folio has the hwpoison flag set */ - PG_has_hwpoisoned = PG_error, + PG_has_hwpoisoned = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ + PG_partially_mapped = PG_reclaim, /* was identified to be partially mapped */ }; #define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1) @@ -235,7 +237,7 @@ static __always_inline int page_is_fake_head(const struct page *page) return page_fixed_fake_head(page) != page; } -static inline unsigned long _compound_head(const struct page *page) +static __always_inline unsigned long _compound_head(const struct page *page) { unsigned long head = READ_ONCE(page->compound_head); @@ -304,7 +306,7 @@ static const unsigned long *const_folio_flags(const struct folio *folio, { const struct page *page = &folio->page; - VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); return &page[n].flags; } @@ -313,7 +315,7 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; - VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(page->compound_head & 1, page); VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); return &page[n].flags; } @@ -506,7 +508,6 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } __PAGEFLAG(Locked, locked, PF_NO_TAIL) FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) -PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE) __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE) @@ -514,8 +515,9 @@ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) TESTCLEARFLAG(LRU, lru, PF_HEAD) -PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) - TESTCLEARFLAG(Active, active, PF_HEAD) +FOLIO_FLAG(active, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(active, FOLIO_HEAD_PAGE) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ @@ -531,9 +533,9 @@ PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND) PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) __SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND) -PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) - __CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) - __SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) +FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(swapbacked, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(swapbacked, FOLIO_HEAD_PAGE) /* * Private page markings that may be used by the filesystem that owns the page @@ -541,9 +543,10 @@ PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL) * - PG_private and PG_private_2 cause release_folio() and co to be invoked */ PAGEFLAG(Private, private, PF_ANY) -PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY) -PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) - TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY) +FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE) + +/* owner_2 can be set on tail pages for anon memory */ +FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE) /* * Only test-and-set exist for PG_writeback. The unconditional operators are @@ -551,13 +554,13 @@ PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY) */ TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL) TESTSCFLAG(Writeback, writeback, PF_NO_TAIL) -PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL) +FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE) /* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL) -PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) - TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND) +FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE) #ifdef CONFIG_HIGHMEM /* @@ -577,34 +580,26 @@ static __always_inline bool folio_test_swapcache(const struct folio *folio) test_bit(PG_swapcache, const_folio_flags(folio, 0)); } -static __always_inline bool PageSwapCache(const struct page *page) -{ - return folio_test_swapcache(page_folio(page)); -} - -SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) -CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL) +FOLIO_SET_FLAG(swapcache, FOLIO_HEAD_PAGE) +FOLIO_CLEAR_FLAG(swapcache, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(SwapCache, swapcache) +FOLIO_FLAG_FALSE(swapcache) #endif -PAGEFLAG(Unevictable, unevictable, PF_HEAD) - __CLEARPAGEFLAG(Unevictable, unevictable, PF_HEAD) - TESTCLEARFLAG(Unevictable, unevictable, PF_HEAD) +FOLIO_FLAG(unevictable, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(unevictable, FOLIO_HEAD_PAGE) #ifdef CONFIG_MMU -PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) - __CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL) - TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL) -#else -PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked) - TESTSCFLAG_FALSE(Mlocked, mlocked) -#endif - -#ifdef CONFIG_ARCH_USES_PG_UNCACHED -PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND) +FOLIO_FLAG(mlocked, FOLIO_HEAD_PAGE) + __FOLIO_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(mlocked, FOLIO_HEAD_PAGE) + FOLIO_TEST_SET_FLAG(mlocked, FOLIO_HEAD_PAGE) #else -PAGEFLAG_FALSE(Uncached, uncached) +FOLIO_FLAG_FALSE(mlocked) + __FOLIO_CLEAR_FLAG_NOOP(mlocked) + FOLIO_TEST_CLEAR_FLAG_FALSE(mlocked) + FOLIO_TEST_SET_FLAG_FALSE(mlocked) #endif #ifdef CONFIG_MEMORY_FAILURE @@ -694,6 +689,13 @@ static __always_inline bool folio_test_anon(const struct folio *folio) return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; } +static __always_inline bool PageAnonNotKsm(const struct page *page) +{ + unsigned long flags = (unsigned long)page_folio(page)->mapping; + + return (flags & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_ANON; +} + static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); @@ -723,13 +725,8 @@ static __always_inline bool folio_test_ksm(const struct folio *folio) return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } - -static __always_inline bool PageKsm(const struct page *page) -{ - return folio_test_ksm(page_folio(page)); -} #else -TESTPAGEFLAG_FALSE(Ksm, ksm) +FOLIO_TEST_FLAG_FALSE(ksm) #endif u64 stable_page_flags(const struct page *page); @@ -865,8 +862,18 @@ static inline void ClearPageCompound(struct page *page) ClearPageHead(page); } FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) +FOLIO_TEST_FLAG(partially_mapped, FOLIO_SECOND_PAGE) +/* + * PG_partially_mapped is protected by deferred_split split_queue_lock, + * so its safe to use non-atomic set/clear. + */ +__FOLIO_SET_FLAG(partially_mapped, FOLIO_SECOND_PAGE) +__FOLIO_CLEAR_FLAG(partially_mapped, FOLIO_SECOND_PAGE) #else FOLIO_FLAG_FALSE(large_rmappable) +FOLIO_TEST_FLAG_FALSE(partially_mapped) +__FOLIO_SET_FLAG_NOOP(partially_mapped) +__FOLIO_CLEAR_FLAG_NOOP(partially_mapped) #endif #define PG_head_mask ((1UL << PG_head)) @@ -927,79 +934,82 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* - * For pages that are never mapped to userspace, - * page_type may be used. Because it is initialised to -1, we invert the - * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and - * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of _mapcount won't be - * mistaken for a page type value. + * For pages that do not use mapcount, page_type may be used. + * The low 24 bits of pagetype may be used for your own purposes, as long + * as you are careful to not affect the top 8 bits. The low bits of + * pagetype will be overwritten when you clear the page_type from the page. */ - enum pagetype { - PG_buddy = 0x40000000, - PG_offline = 0x20000000, - PG_table = 0x10000000, - PG_guard = 0x08000000, - PG_hugetlb = 0x04000000, - PG_slab = 0x02000000, - PG_zsmalloc = 0x01000000, - - PAGE_TYPE_BASE = 0x80000000, - - /* - * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and - * allow owners that set a type to reuse the lower 16 bit for their own - * purposes. - */ - PAGE_MAPCOUNT_RESERVE = ~0x0000ffff, + /* 0x00-0x7f are positive numbers, ie mapcount */ + /* Reserve 0x80-0xef for mapcount overflow. */ + PGTY_buddy = 0xf0, + PGTY_offline = 0xf1, + PGTY_table = 0xf2, + PGTY_guard = 0xf3, + PGTY_hugetlb = 0xf4, + PGTY_slab = 0xf5, + PGTY_zsmalloc = 0xf6, + PGTY_unaccepted = 0xf7, + + PGTY_mapcount_underflow = 0xff }; -#define PageType(page, flag) \ - ((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) -#define folio_test_type(folio, flag) \ - ((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) +static inline bool page_type_has_type(int page_type) +{ + return page_type < (PGTY_mapcount_underflow << 24); +} -static inline int page_type_has_type(unsigned int page_type) +/* This takes a mapcount which is one more than page->_mapcount */ +static inline bool page_mapcount_is_type(unsigned int mapcount) { - return (int)page_type < PAGE_MAPCOUNT_RESERVE; + return page_type_has_type(mapcount - 1); } -static inline int page_has_type(const struct page *page) +static inline bool page_has_type(const struct page *page) { - return page_type_has_type(READ_ONCE(page->page_type)); + return page_mapcount_is_type(data_race(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ -static __always_inline bool folio_test_##fname(const struct folio *folio)\ +static __always_inline bool folio_test_##fname(const struct folio *folio) \ { \ - return folio_test_type(folio, PG_##lname); \ + return data_race(folio->page.page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __folio_set_##fname(struct folio *folio) \ { \ - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ - folio->page.page_type &= ~PG_##lname; \ + if (folio_test_##fname(folio)) \ + return; \ + VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ + folio); \ + folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __folio_clear_##fname(struct folio *folio) \ { \ + if (folio->page.page_type == UINT_MAX) \ + return; \ VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ - folio->page.page_type |= PG_##lname; \ + folio->page.page_type = UINT_MAX; \ } #define PAGE_TYPE_OPS(uname, lname, fname) \ FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ - return PageType(page, PG_##lname); \ + return data_race(page->page_type >> 24) == PGTY_##lname; \ } \ static __always_inline void __SetPage##uname(struct page *page) \ { \ - VM_BUG_ON_PAGE(!PageType(page, 0), page); \ - page->page_type &= ~PG_##lname; \ + if (Page##uname(page)) \ + return; \ + VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ + page->page_type = (unsigned int)PGTY_##lname << 24; \ } \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ + if (page->page_type == UINT_MAX) \ + return; \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ - page->page_type |= PG_##lname; \ + page->page_type = UINT_MAX; \ } /* @@ -1076,6 +1086,13 @@ FOLIO_TEST_FLAG_FALSE(hugetlb) PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) +/* + * Mark pages that has to be accepted before touched for the first time. + * + * Serialized with zone lock. + */ +PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) + /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. @@ -1122,14 +1139,14 @@ static __always_inline int PageAnonExclusive(const struct page *page) static __always_inline void SetPageAnonExclusive(struct page *page) { - VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); set_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } static __always_inline void ClearPageAnonExclusive(struct page *page) { - VM_BUG_ON_PGFLAGS(!PageAnon(page) || PageKsm(page), page); + VM_BUG_ON_PGFLAGS(!PageAnonNotKsm(page), page); VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); clear_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } @@ -1175,25 +1192,20 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable | 1UL << PG_partially_mapped) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) /** - * page_has_private - Determine if page has private stuff - * @page: The page to be checked + * folio_has_private - Determine if folio has private stuff + * @folio: The folio to be checked * - * Determine if a page has private stuff, indicating that release routines + * Determine if a folio has private stuff, indicating that release routines * should be invoked upon it. */ -static inline int page_has_private(const struct page *page) -{ - return !!(page->flags & PAGE_FLAGS_PRIVATE); -} - -static inline bool folio_has_private(const struct folio *folio) +static inline int folio_has_private(const struct folio *folio) { - return page_has_private(&folio->page); + return !!(folio->flags & PAGE_FLAGS_PRIVATE); } #undef PF_ANY diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index c16db0067090..73dc2c1841ec 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -3,10 +3,6 @@ #define __LINUX_PAGEISOLATION_H #ifdef CONFIG_MEMORY_ISOLATION -static inline bool has_isolate_pageblock(struct zone *zone) -{ - return zone->nr_isolate_pageblock; -} static inline bool is_migrate_isolate_page(struct page *page) { return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; @@ -16,10 +12,6 @@ static inline bool is_migrate_isolate(int migratetype) return migratetype == MIGRATE_ISOLATE; } #else -static inline bool has_isolate_pageblock(struct zone *zone) -{ - return false; -} static inline bool is_migrate_isolate_page(struct page *page) { return false; diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 904c52f97284..79dbd8bc35a7 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -26,11 +26,14 @@ struct page_counter { atomic_long_t children_low_usage; unsigned long watermark; + /* Latest cg2 reset watermark */ + unsigned long local_watermark; unsigned long failcnt; /* Keep all the read most fields in a separete cacheline. */ CACHELINE_PADDING(_pad2_); + bool protection_support; unsigned long min; unsigned long low; unsigned long high; @@ -44,12 +47,17 @@ struct page_counter { #define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE) #endif +/* + * Protection is supported only for the first counter (with id 0). + */ static inline void page_counter_init(struct page_counter *counter, - struct page_counter *parent) + struct page_counter *parent, + bool protection_support) { - atomic_long_set(&counter->usage, 0); + counter->usage = (atomic_long_t)ATOMIC_LONG_INIT(0); counter->max = PAGE_COUNTER_MAX; counter->parent = parent; + counter->protection_support = protection_support; } static inline unsigned long page_counter_read(struct page_counter *counter) @@ -78,11 +86,24 @@ int page_counter_memparse(const char *buf, const char *max, static inline void page_counter_reset_watermark(struct page_counter *counter) { - counter->watermark = page_counter_read(counter); + unsigned long usage = page_counter_read(counter); + + /* + * Update local_watermark first, so it's always <= watermark + * (modulo CPU/compiler re-ordering) + */ + counter->local_watermark = usage; + counter->watermark = usage; } +#ifdef CONFIG_MEMCG void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, bool recursive_protection); +#else +static inline void page_counter_calculate_protection(struct page_counter *root, + struct page_counter *counter, + bool recursive_protection) {} +#endif #endif /* _LINUX_PAGE_COUNTER_H */ diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h new file mode 100644 index 000000000000..41a91df82631 --- /dev/null +++ b/include/linux/page_frag_cache.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_PAGE_FRAG_CACHE_H +#define _LINUX_PAGE_FRAG_CACHE_H + +#include <linux/bits.h> +#include <linux/log2.h> +#include <linux/mm_types_task.h> +#include <linux/types.h> + +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) +/* Use a full byte here to enable assembler optimization as the shift + * operation is usually expecting a byte. + */ +#define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0) +#else +/* Compiler should be able to figure out we don't read things as any value + * ANDed with 0 is 0. + */ +#define PAGE_FRAG_CACHE_ORDER_MASK 0 +#endif + +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT (PAGE_FRAG_CACHE_ORDER_MASK + 1) + +static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page) +{ + return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT); +} + +static inline void page_frag_cache_init(struct page_frag_cache *nc) +{ + nc->encoded_page = 0; +} + +static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc) +{ + return encoded_page_decode_pfmemalloc(nc->encoded_page); +} + +void page_frag_cache_drain(struct page_frag_cache *nc); +void __page_frag_cache_drain(struct page *page, unsigned int count); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} + +static inline void *page_frag_alloc(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); +} + +void page_frag_free(void *addr); + +#endif diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d9c7edb6422b..bcf0865a38ae 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -32,6 +32,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int kiocb_invalidate_pages(struct kiocb *iocb, size_t count); void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count); +int filemap_invalidate_pages(struct address_space *mapping, + loff_t pos, loff_t end, bool nowait); int write_inode_now(struct inode *, int sync); int filemap_fdatawrite(struct address_space *); @@ -204,14 +206,21 @@ enum mapping_flags { AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, - AS_LARGE_FOLIO_SUPPORT = 6, - AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ - AS_STABLE_WRITES, /* must wait for writeback before modifying + AS_RELEASE_ALWAYS = 6, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES = 7, /* must wait for writeback before modifying folio contents */ - AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping, - including to move the mapping */ + AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ + /* Bits 16-25 are used for FOLIO_ORDER */ + AS_FOLIO_ORDER_BITS = 5, + AS_FOLIO_ORDER_MIN = 16, + AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS, }; +#define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1) +#define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN) +#define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX) +#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK) + /** * mapping_set_error - record a writeback error in the address_space * @mapping: the mapping in which an error should be set @@ -367,9 +376,64 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) #define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) #define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) +/* + * mapping_max_folio_size_supported() - Check the max folio size supported + * + * The filesystem should call this function at mount time if there is a + * requirement on the folio mapping size in the page cache. + */ +static inline size_t mapping_max_folio_size_supported(void) +{ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER); + return PAGE_SIZE; +} + +/* + * mapping_set_folio_order_range() - Set the orders supported by a file. + * @mapping: The address space of the file. + * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive). + * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive). + * + * The filesystem should call this function in its inode constructor to + * indicate which base size (min) and maximum size (max) of folio the VFS + * can use to cache the contents of the file. This should only be used + * if the filesystem needs special handling of folio sizes (ie there is + * something the core cannot know). + * Do not tune it based on, eg, i_size. + * + * Context: This should not be called while the inode is active as it + * is non-atomic. + */ +static inline void mapping_set_folio_order_range(struct address_space *mapping, + unsigned int min, + unsigned int max) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return; + + if (min > MAX_PAGECACHE_ORDER) + min = MAX_PAGECACHE_ORDER; + + if (max > MAX_PAGECACHE_ORDER) + max = MAX_PAGECACHE_ORDER; + + if (max < min) + max = min; + + mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) | + (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX); +} + +static inline void mapping_set_folio_min_order(struct address_space *mapping, + unsigned int min) +{ + mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER); +} + /** * mapping_set_large_folios() - Indicate the file supports large folios. - * @mapping: The file. + * @mapping: The address space of the file. * * The filesystem should call this function in its inode constructor to * indicate that the VFS can use large folios to cache the contents of @@ -380,7 +444,44 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) */ static inline void mapping_set_large_folios(struct address_space *mapping) { - __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); + mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER); +} + +static inline unsigned int +mapping_max_folio_order(const struct address_space *mapping) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return 0; + return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX; +} + +static inline unsigned int +mapping_min_folio_order(const struct address_space *mapping) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return 0; + return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN; +} + +static inline unsigned long +mapping_min_folio_nrpages(struct address_space *mapping) +{ + return 1UL << mapping_min_folio_order(mapping); +} + +/** + * mapping_align_index() - Align index for this mapping. + * @mapping: The address_space. + * @index: The page index. + * + * The index of a folio must be naturally aligned. If you are adding a + * new folio to the page cache and need to know what index to give it, + * call this function. + */ +static inline pgoff_t mapping_align_index(struct address_space *mapping, + pgoff_t index) +{ + return round_down(index, mapping_min_folio_nrpages(mapping)); } /* @@ -389,20 +490,17 @@ static inline void mapping_set_large_folios(struct address_space *mapping) */ static inline bool mapping_large_folio_support(struct address_space *mapping) { - /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */ + /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, "Anonymous mapping always supports large folio"); - return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); + return mapping_max_folio_order(mapping) > 0; } /* Return the maximum folio size for this pagecache mapping, in bytes. */ -static inline size_t mapping_max_folio_size(struct address_space *mapping) +static inline size_t mapping_max_folio_size(const struct address_space *mapping) { - if (mapping_large_folio_support(mapping)) - return PAGE_SIZE << MAX_PAGECACHE_ORDER; - return PAGE_SIZE; + return PAGE_SIZE << mapping_max_folio_order(mapping); } static inline int filemap_nr_thps(struct address_space *mapping) @@ -913,22 +1011,25 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping, return read_cache_folio(mapping, index, NULL, file); } -/* - * Get the offset in PAGE_SIZE (even for hugetlb pages). +/** + * page_pgoff - Calculate the logical page offset of this page. + * @folio: The folio containing this page. + * @page: The page which we need the offset of. + * + * For file pages, this is the offset from the beginning of the file + * in units of PAGE_SIZE. For anonymous pages, this is the offset from + * the beginning of the anon_vma in units of PAGE_SIZE. This will + * return nonsense for KSM pages. + * + * Context: Caller must have a reference on the folio or otherwise + * prevent it from being split or freed. + * + * Return: The offset in units of PAGE_SIZE. */ -static inline pgoff_t page_to_pgoff(struct page *page) +static inline pgoff_t page_pgoff(const struct folio *folio, + const struct page *page) { - struct page *head; - - if (likely(!PageTransTail(page))) - return page->index; - - head = compound_head(page); - /* - * We don't initialize ->index for tail pages: calculate based on - * head page - */ - return head->index + page - head; + return folio->index + folio_page_idx(folio, page); } /* diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 27cd1e59ccf7..9700a29f8afb 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -25,12 +25,15 @@ enum page_walk_lock { * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to * split_huge_page() instead of handling it explicitly. - * @pte_entry: if set, called for each PTE (lowest-level) entry, - * including empty ones + * @pte_entry: if set, called for each PTE (lowest-level) entry + * including empty ones, except if @install_pte is set. + * If @install_pte is set, @pte_entry is called only for + * existing PTEs. * @pte_hole: if set, called for each hole at all levels, * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. * Any folded depths (where PTRS_PER_P?D is equal to 1) - * are skipped. + * are skipped. If @install_pte is specified, this will + * not trigger for any populated ranges. * @hugetlb_entry: if set, called for each hugetlb entry. This hook * function is called with the vma lock held, in order to * protect against a concurrent freeing of the pte_t* or @@ -51,6 +54,13 @@ enum page_walk_lock { * @pre_vma: if set, called before starting walk on a non-null vma. * @post_vma: if set, called after a walk on a non-null vma, provided * that @pre_vma and the vma walk succeeded. + * @install_pte: if set, missing page table entries are installed and + * thus all levels are always walked in the specified + * range. This callback is then invoked at the PTE level + * (having split any THP pages prior), providing the PTE to + * install. If allocations fail, the walk is aborted. This + * operation is only available for userland memory. Not + * usable for hugetlb ranges. * * p?d_entry callbacks are called even if those levels are folded on a * particular architecture/configuration. @@ -76,6 +86,8 @@ struct mm_walk_ops { int (*pre_vma)(unsigned long start, unsigned long end, struct mm_walk *walk); void (*post_vma)(struct mm_walk *walk); + int (*install_pte)(unsigned long addr, unsigned long next, + pte_t *ptep, struct mm_walk *walk); enum page_walk_lock walk_lock; }; @@ -130,4 +142,62 @@ int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, pgoff_t nr, const struct mm_walk_ops *ops, void *private); +typedef int __bitwise folio_walk_flags_t; + +/* + * Walk migration entries as well. Careful: a large folio might get split + * concurrently. + */ +#define FW_MIGRATION ((__force folio_walk_flags_t)BIT(0)) + +/* Walk shared zeropages (small + huge) as well. */ +#define FW_ZEROPAGE ((__force folio_walk_flags_t)BIT(1)) + +enum folio_walk_level { + FW_LEVEL_PTE, + FW_LEVEL_PMD, + FW_LEVEL_PUD, +}; + +/** + * struct folio_walk - folio_walk_start() / folio_walk_end() data + * @page: exact folio page referenced (if applicable) + * @level: page table level identifying the entry type + * @pte: pointer to the page table entry (FW_LEVEL_PTE). + * @pmd: pointer to the page table entry (FW_LEVEL_PMD). + * @pud: pointer to the page table entry (FW_LEVEL_PUD). + * @ptl: pointer to the page table lock. + * + * (see folio_walk_start() documentation for more details) + */ +struct folio_walk { + /* public */ + struct page *page; + enum folio_walk_level level; + union { + pte_t *ptep; + pud_t *pudp; + pmd_t *pmdp; + }; + union { + pte_t pte; + pud_t pud; + pmd_t pmd; + }; + /* private */ + struct vm_area_struct *vma; + spinlock_t *ptl; +}; + +struct folio *folio_walk_start(struct folio_walk *fw, + struct vm_area_struct *vma, unsigned long addr, + folio_walk_flags_t flags); + +#define folio_walk_end(__fw, __vma) do { \ + spin_unlock((__fw)->ptl); \ + if (likely((__fw)->level == FW_LEVEL_PTE)) \ + pte_unmap((__fw)->ptep); \ + vma_pgtable_walk_end(__vma); \ +} while (0) + #endif /* _LINUX_PAGEWALK_H */ diff --git a/include/linux/path.h b/include/linux/path.h index ca073e70decd..7ea389dc764b 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -18,12 +18,6 @@ static inline int path_equal(const struct path *path1, const struct path *path2) return path1->mnt == path2->mnt && path1->dentry == path2->dentry; } -static inline void path_put_init(struct path *path) -{ - path_put(path); - *path = (struct path) { }; -} - /* * Cleanup macro for use with __free(path_put). Avoids dereference and * copying @path unlike DEFINE_FREE(). path_put() will handle the empty diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index df54cd5b15db..0e8b74e63767 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -8,6 +8,7 @@ /* Address Translation Service */ bool pci_ats_supported(struct pci_dev *dev); int pci_enable_ats(struct pci_dev *dev, int ps); +int pci_prepare_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); int pci_ats_page_aligned(struct pci_dev *dev); @@ -16,6 +17,8 @@ static inline bool pci_ats_supported(struct pci_dev *d) { return false; } static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } +static inline int pci_prepare_ats(struct pci_dev *dev, int ps) +{ return -ENODEV; } static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h new file mode 100644 index 000000000000..cee07127455b --- /dev/null +++ b/include/linux/pci-bwctrl.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PCIe bandwidth controller + * + * Copyright (C) 2023-2024 Intel Corporation + */ + +#ifndef LINUX_PCI_BWCTRL_H +#define LINUX_PCI_BWCTRL_H + +#include <linux/pci.h> + +struct thermal_cooling_device; + +#ifdef CONFIG_PCIE_THERMAL +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port); +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev); +#else +static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + return NULL; +} +static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ +} +#endif + +#endif diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 85bdf2adb760..e818e3fdcded 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -33,10 +33,42 @@ pci_epc_interface_string(enum pci_epc_interface_type type) } /** + * struct pci_epc_map - information about EPC memory for mapping a RC PCI + * address range + * @pci_addr: start address of the RC PCI address range to map + * @pci_size: size of the RC PCI address range mapped from @pci_addr + * @map_pci_addr: RC PCI address used as the first address mapped (may be lower + * than @pci_addr) + * @map_size: size of the controller memory needed for mapping the RC PCI address + * range @map_pci_addr..@pci_addr+@pci_size + * @phys_base: base physical address of the allocated EPC memory for mapping the + * RC PCI address range + * @phys_addr: physical address at which @pci_addr is mapped + * @virt_base: base virtual address of the allocated EPC memory for mapping the + * RC PCI address range + * @virt_addr: virtual address at which @pci_addr is mapped + */ +struct pci_epc_map { + u64 pci_addr; + size_t pci_size; + + u64 map_pci_addr; + size_t map_size; + + phys_addr_t phys_base; + phys_addr_t phys_addr; + void __iomem *virt_base; + void __iomem *virt_addr; +}; + +/** * struct pci_epc_ops - set of function pointers for performing EPC operations * @write_header: ops to populate configuration space header * @set_bar: ops to configure the BAR * @clear_bar: ops to reset the BAR + * @align_addr: operation to get the mapping address, mapping size and offset + * into a controller memory window needed to map an RC PCI address + * region * @map_addr: ops to map CPU address to PCI address * @unmap_addr: ops to unmap CPU address and PCI address * @set_msi: ops to set the requested number of MSI interrupts in the MSI @@ -61,6 +93,8 @@ struct pci_epc_ops { struct pci_epf_bar *epf_bar); void (*clear_bar)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar); + u64 (*align_addr)(struct pci_epc *epc, u64 pci_addr, size_t *size, + size_t *offset); int (*map_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr, u64 pci_addr, size_t size); void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, @@ -128,6 +162,7 @@ struct pci_epc_mem { * @group: configfs group representing the PCI EPC device * @lock: mutex to protect pci_epc ops * @function_num_map: bitmap to manage physical function number + * @domain_nr: PCI domain number of the endpoint controller * @init_complete: flag to indicate whether the EPC initialization is complete * or not */ @@ -145,10 +180,12 @@ struct pci_epc { /* mutex to protect against concurrent access of EP controller */ struct mutex lock; unsigned long function_num_map; + int domain_nr; bool init_complete; }; /** + * enum pci_epc_bar_type - configurability of endpoint BAR * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC. * @BAR_FIXED: The BAR mask is fixed by the hardware. * @BAR_RESERVED: The BAR should not be touched by an EPF driver. @@ -275,6 +312,10 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, void __iomem *virt_addr, size_t size); +int pci_epc_mem_map(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + u64 pci_addr, size_t pci_size, struct pci_epc_map *map); +void pci_epc_mem_unmap(struct pci_epc *epc, u8 func_no, u8 vfunc_no, + struct pci_epc_map *map); #else static inline void pci_epc_init_notify(struct pci_epc *epc) diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctrl.h index 45e9cfe740e4..7d439b0675e9 100644 --- a/include/linux/pci-pwrctl.h +++ b/include/linux/pci-pwrctrl.h @@ -3,10 +3,11 @@ * Copyright (C) 2024 Linaro Ltd. */ -#ifndef __PCI_PWRCTL_H__ -#define __PCI_PWRCTL_H__ +#ifndef __PCI_PWRCTRL_H__ +#define __PCI_PWRCTRL_H__ #include <linux/notifier.h> +#include <linux/workqueue.h> struct device; struct device_link; @@ -28,24 +29,26 @@ struct device_link; */ /** - * struct pci_pwrctl - PCI device power control context. + * struct pci_pwrctrl - PCI device power control context. * @dev: Address of the power controlling device. * * An object of this type must be allocated by the PCI power control device and - * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * passed to the pwrctrl subsystem to trigger a bus rescan and setup a device * link with the device once it's up. */ -struct pci_pwrctl { +struct pci_pwrctrl { struct device *dev; /* Private: don't use. */ struct notifier_block nb; struct device_link *link; + struct work_struct work; }; -int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); -void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); -int devm_pci_pwrctl_device_set_ready(struct device *dev, - struct pci_pwrctl *pwrctl); +void pci_pwrctrl_init(struct pci_pwrctrl *pwrctrl, struct device *dev); +int pci_pwrctrl_device_set_ready(struct pci_pwrctrl *pwrctrl); +void pci_pwrctrl_device_unset_ready(struct pci_pwrctrl *pwrctrl); +int devm_pci_pwrctrl_device_set_ready(struct device *dev, + struct pci_pwrctrl *pwrctrl); -#endif /* __PCI_PWRCTL_H__ */ +#endif /* __PCI_PWRCTRL_H__ */ diff --git a/include/linux/pci-tph.h b/include/linux/pci-tph.h new file mode 100644 index 000000000000..c3e806c13d64 --- /dev/null +++ b/include/linux/pci-tph.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TPH (TLP Processing Hints) + * + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * Eric Van Tassell <Eric.VanTassell@amd.com> + * Wei Huang <wei.huang2@amd.com> + */ +#ifndef LINUX_PCI_TPH_H +#define LINUX_PCI_TPH_H + +/* + * According to the ECN for PCI Firmware Spec, Steering Tag can be different + * depending on the memory type: Volatile Memory or Persistent Memory. When a + * caller query about a target's Steering Tag, it must provide the target's + * tph_mem_type. ECN link: https://members.pcisig.com/wg/PCI-SIG/document/15470. + */ +enum tph_mem_type { + TPH_MEM_TYPE_VM, /* volatile memory */ + TPH_MEM_TYPE_PM /* persistent memory */ +}; + +#ifdef CONFIG_PCIE_TPH +int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag); +int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag); +void pcie_disable_tph(struct pci_dev *pdev); +int pcie_enable_tph(struct pci_dev *pdev, int mode); +#else +static inline int pcie_tph_set_st_entry(struct pci_dev *pdev, + unsigned int index, u16 tag) +{ return -EINVAL; } +static inline int pcie_tph_get_cpu_st(struct pci_dev *dev, + enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *tag) +{ return -EINVAL; } +static inline void pcie_disable_tph(struct pci_dev *pdev) { } +static inline int pcie_enable_tph(struct pci_dev *pdev, int mode) +{ return -EINVAL; } +#endif + +#endif /* LINUX_PCI_TPH_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 4cf89a4b4cbc..db9b47ce3eef 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -313,12 +313,20 @@ struct pci_vpd { }; struct irq_affinity; +struct pcie_bwctrl_data; struct pcie_link_state; struct pci_sriov; struct pci_p2pdma; struct rcec_ea; -/* The pci_dev structure describes PCI devices */ +/* struct pci_dev - describes a PCI device + * + * @supported_speeds: PCIe Supported Link Speeds Vector (+ reserved 0 at + * LSB). 0 when the supported speeds cannot be + * determined (e.g., for Root Complex Integrated + * Endpoints without the relevant Capability + * Registers). + */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ struct pci_bus *bus; /* Bus this device is on */ @@ -371,6 +379,7 @@ struct pci_dev { can be generated */ unsigned int pme_poll:1; /* Poll device's PME status bit */ unsigned int pinned:1; /* Whether this dev is pinned */ + unsigned int config_rrs_sv:1; /* Config RRS software visibility */ unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ @@ -433,6 +442,7 @@ struct pci_dev { unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ + unsigned int tph_enabled:1; /* TLP Processing Hints */ unsigned int is_managed:1; /* Managed via devres */ unsigned int is_msi_managed:1; /* MSI release via devres installed */ unsigned int needs_freset:1; /* Requires fundamental reset */ @@ -494,6 +504,7 @@ struct pci_dev { unsigned int dpc_rp_extensions:1; u8 dpc_rp_log_size; #endif + struct pcie_bwctrl_data *link_bwctrl; #ifdef CONFIG_PCI_ATS union { struct pci_sriov *sriov; /* PF: SR-IOV info */ @@ -517,7 +528,11 @@ struct pci_dev { #ifdef CONFIG_PCI_DOE struct xarray doe_mbs; /* Data Object Exchange mailboxes */ #endif +#ifdef CONFIG_PCI_NPEM + struct npem *npem; /* Native PCIe Enclosure Management */ +#endif u16 acs_cap; /* ACS Capability offset */ + u8 supported_speeds; /* Supported Link Speeds Vector */ phys_addr_t rom; /* Physical address if not from BAR */ size_t romlen; /* Length if not from BAR */ /* @@ -530,6 +545,12 @@ struct pci_dev { /* These methods index pci_reset_fn_methods[] */ u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */ + +#ifdef CONFIG_PCIE_TPH + u16 tph_cap; /* TPH capability offset */ + u8 tph_mode; /* TPH mode */ + u8 tph_req_type; /* TPH requester type */ +#endif }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) @@ -622,27 +643,6 @@ void pci_set_host_bridge_release(struct pci_host_bridge *bridge, int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge); -/* - * The first PCI_BRIDGE_RESOURCE_NUM PCI bus resources (those that correspond - * to P2P or CardBus bridge windows) go in a table. Additional ones (for - * buses below host bridges or subtractive decode bridges) go in the list. - * Use pci_bus_for_each_resource() to iterate through all the resources. - */ - -/* - * PCI_SUBTRACTIVE_DECODE means the bridge forwards the window implicitly - * and there's no way to program the bridge with the details of the window. - * This does not apply to ACPI _CRS windows, even with the _DEC subtractive- - * decode bit set, because they are explicit and can be programmed with _SRS. - */ -#define PCI_SUBTRACTIVE_DECODE 0x1 - -struct pci_bus_resource { - struct list_head list; - struct resource *res; - unsigned int flags; -}; - #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ struct pci_bus { @@ -1098,7 +1098,7 @@ enum pcie_bus_config_types { extern enum pcie_bus_config_types pcie_bus_config; -extern struct bus_type pci_bus_type; +extern const struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch-specific PCI * code, or PCI core code. */ @@ -1270,6 +1270,7 @@ static inline int pcie_capability_clear_and_set_word(struct pci_dev *dev, { switch (pos) { case PCI_EXP_LNKCTL: + case PCI_EXP_LNKCTL2: case PCI_EXP_RTCTL: return pcie_capability_clear_and_set_word_locked(dev, pos, clear, set); @@ -1494,8 +1495,7 @@ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, resource_size_t offset); void pci_free_resource_list(struct list_head *resources); -void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, - unsigned int flags); +void pci_bus_add_resource(struct pci_bus *bus, struct resource *res); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); void pci_bus_remove_resources(struct pci_bus *bus); void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res); @@ -1552,7 +1552,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, void *alignf_data); -int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr, +int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr, resource_size_t size); unsigned long pci_address_to_pio(phys_addr_t addr); phys_addr_t pci_pio_to_address(unsigned long pio); @@ -1619,8 +1619,6 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); -void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), - void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus); @@ -1782,9 +1780,19 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d, #ifdef CONFIG_PCIEPORTBUS extern bool pcie_ports_disabled; extern bool pcie_ports_native; + +int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, + bool use_lt); #else #define pcie_ports_disabled true #define pcie_ports_native false + +static inline int pcie_set_target_speed(struct pci_dev *port, + enum pci_bus_speed speed_req, + bool use_lt) +{ + return -EOPNOTSUPP; +} #endif #define PCIE_LINK_STATE_L0S (BIT(0) | BIT(1)) /* Upstr/dwnstr L0s */ @@ -1884,7 +1892,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) { return 0; } #endif int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent); -void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent); +void pci_bus_release_domain_nr(struct device *parent, int domain_nr); #endif /* Some architectures require additional setup to direct VGA traffic */ @@ -2015,7 +2023,7 @@ static inline int pci_request_regions(struct pci_dev *dev, const char *res_name) { return -EIO; } static inline void pci_release_regions(struct pci_dev *dev) { } -static inline int pci_register_io_range(struct fwnode_handle *fwnode, +static inline int pci_register_io_range(const struct fwnode_handle *fwnode, phys_addr_t addr, resource_size_t size) { return -EINVAL; } @@ -2289,12 +2297,15 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev) { } #endif +int pcim_request_all_regions(struct pci_dev *pdev, const char *name); void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen); +void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, + const char *name); +void pcim_iounmap_region(struct pci_dev *pdev, int bar); void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); +int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, - const char *name); void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); @@ -2602,6 +2613,12 @@ pci_host_bridge_acpi_msi_domain(struct pci_bus *bus) { return NULL; } static inline bool pci_pr3_present(struct pci_dev *pdev) { return false; } #endif +#if defined(CONFIG_X86) && defined(CONFIG_ACPI) +bool arch_pci_dev_is_removable(struct pci_dev *pdev); +#else +static inline bool arch_pci_dev_is_removable(struct pci_dev *pdev) { return false; } +#endif + #ifdef CONFIG_EEH static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index e388c8b1cbc2..d2402bf4aea2 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -121,6 +121,7 @@ #define PCI_CLASS_SERIAL_USB_OHCI 0x0c0310 #define PCI_CLASS_SERIAL_USB_EHCI 0x0c0320 #define PCI_CLASS_SERIAL_USB_XHCI 0x0c0330 +#define PCI_CLASS_SERIAL_USB_CDNS 0x0c0380 #define PCI_CLASS_SERIAL_USB_DEVICE 0x0c03fe #define PCI_CLASS_SERIAL_FIBER 0x0c04 #define PCI_CLASS_SERIAL_SMBUS 0x0c05 @@ -580,6 +581,7 @@ #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb +#define PCI_DEVICE_ID_AMD_1AH_M60H_DF_F3 0x124b #define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3 0x12bb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 #define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b @@ -2420,6 +2422,9 @@ #define PCI_VENDOR_ID_QCOM 0x17cb #define PCI_VENDOR_ID_CDNS 0x17cd +#define PCI_DEVICE_ID_CDNS_USBSS 0x0100 +#define PCI_DEVICE_ID_CDNS_USB 0x0120 +#define PCI_DEVICE_ID_CDNS_USBSSP 0x0200 #define PCI_VENDOR_ID_ARECA 0x17d3 #define PCI_DEVICE_ID_ARECA_1110 0x1110 @@ -2661,6 +2666,8 @@ #define PCI_DEVICE_ID_DCI_PCCOM8 0x0002 #define PCI_DEVICE_ID_DCI_PCCOM2 0x0004 +#define PCI_VENDOR_ID_GLENFLY 0x6766 + #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 #define PCI_DEVICE_ID_INTEL_HDA_CML_LP 0x02c8 diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index b4a4eb6c8866..b5b5d17998b8 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -21,8 +21,6 @@ #define DW_AN_C37_1000BASEX 4 #define DW_10GBASER 5 -struct dw_xpcs_desc; - enum dw_xpcs_pcs_id { DW_XPCS_ID_NATIVE = 0, NXP_SJA1105_XPCS_ID = 0x00000010, @@ -48,33 +46,18 @@ struct dw_xpcs_info { u32 pma; }; -enum dw_xpcs_clock { - DW_XPCS_CORE_CLK, - DW_XPCS_PAD_CLK, - DW_XPCS_NUM_CLKS, -}; - -struct dw_xpcs { - struct dw_xpcs_info info; - const struct dw_xpcs_desc *desc; - struct mdio_device *mdiodev; - struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; - struct phylink_pcs pcs; - phy_interface_t interface; -}; +struct dw_xpcs; +struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, - phy_interface_t interface, int speed, int duplex); -int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, - const unsigned long *advertising, unsigned int neg_mode); void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); -struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, - phy_interface_t interface); -struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, - phy_interface_t interface); +struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr); +struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode); void xpcs_destroy(struct dw_xpcs *xpcs); +struct phylink_pcs *xpcs_create_pcs_mdiodev(struct mii_bus *bus, int addr); +void xpcs_destroy_pcs(struct phylink_pcs *pcs); + #endif /* __LINUX_PCS_XPCS_H */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8efce7414fad..35842d1e3879 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -220,15 +220,20 @@ do { \ (void)__vpp_verify; \ } while (0) +#define PERCPU_PTR(__p) \ +({ \ + unsigned long __pcpu_ptr = (__force unsigned long)(__p); \ + (typeof(*(__p)) __force __kernel *)(__pcpu_ptr); \ +}) + #ifdef CONFIG_SMP /* - * Add an offset to a pointer but keep the pointer as-is. Use RELOC_HIDE() - * to prevent the compiler from making incorrect assumptions about the - * pointer value. The weird cast keeps both GCC and sparse happy. + * Add an offset to a pointer. Use RELOC_HIDE() to prevent the compiler + * from making incorrect assumptions about the pointer value. */ #define SHIFT_PERCPU_PTR(__p, __offset) \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) + RELOC_HIDE(PERCPU_PTR(__p), (__offset)) #define per_cpu_ptr(ptr, cpu) \ ({ \ @@ -254,13 +259,13 @@ do { \ #else /* CONFIG_SMP */ -#define VERIFY_PERCPU_PTR(__p) \ +#define per_cpu_ptr(ptr, cpu) \ ({ \ - __verify_pcpu_ptr(__p); \ - (typeof(*(__p)) __kernel __force *)(__p); \ + (void)(cpu); \ + __verify_pcpu_ptr(ptr); \ + PERCPU_PTR(ptr); \ }) -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR(ptr); }) #define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 36b942b67b7d..c012df33a9f0 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -145,7 +145,7 @@ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); #define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, - bool read, unsigned long ip) + unsigned long ip) { lock_release(&sem->dep_map, ip); } diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4b2047b78b67..52b5ea663b9f 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -41,7 +41,11 @@ PCPU_MIN_ALLOC_SHIFT) #ifdef CONFIG_RANDOM_KMALLOC_CACHES -#define PERCPU_DYNAMIC_SIZE_SHIFT 12 +# if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PAGE_SIZE_4KB) +# define PERCPU_DYNAMIC_SIZE_SHIFT 13 +# else +# define PERCPU_DYNAMIC_SIZE_SHIFT 12 +#endif /* LOCKDEP and PAGE_SIZE > 4KiB */ #else #define PERCPU_DYNAMIC_SIZE_SHIFT 10 #endif @@ -135,7 +139,6 @@ extern void __init setup_per_cpu_areas(void); extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, gfp_t gfp) __alloc_size(1); -extern size_t pcpu_alloc_size(void __percpu *__pdata); #define __alloc_percpu_gfp(_size, _align, _gfp) \ alloc_hooks(pcpu_alloc_noprof(_size, _align, false, _gfp)) diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index b3b34f6670cf..4b5b83677e3f 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -17,10 +17,14 @@ #ifdef CONFIG_ARM_PMU /* - * The ARMv7 CPU PMU supports up to 32 event counters. + * The Armv7 and Armv8.8 or less CPU PMU supports up to 32 event counters. + * The Armv8.9/9.4 CPU PMU supports up to 33 event counters. */ +#ifdef CONFIG_ARM #define ARMPMU_MAX_HWEVENTS 32 - +#else +#define ARMPMU_MAX_HWEVENTS 33 +#endif /* * ARM PMU hw_event flags */ @@ -96,7 +100,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); - int num_events; + DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS); bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 7867db04ec98..d698efba28a2 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -6,8 +6,9 @@ #ifndef __PERF_ARM_PMUV3_H #define __PERF_ARM_PMUV3_H -#define ARMV8_PMU_MAX_COUNTERS 32 -#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1) +#define ARMV8_PMU_MAX_GENERAL_COUNTERS 31 +#define ARMV8_PMU_CYCLE_IDX 31 +#define ARMV8_PMU_INSTR_IDX 32 /* Not accessible from AArch32 */ /* * Common architectural and microarchitectural event numbers. @@ -227,8 +228,10 @@ */ #define ARMV8_PMU_OVSR_P GENMASK(30, 0) #define ARMV8_PMU_OVSR_C BIT(31) +#define ARMV8_PMU_OVSR_F BIT_ULL(32) /* arm64 only */ /* Mask for writable bits is both P and C fields */ -#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C) +#define ARMV8_PMU_OVERFLOWED_MASK (ARMV8_PMU_OVSR_P | ARMV8_PMU_OVSR_C | \ + ARMV8_PMU_OVSR_F) /* * PMXEVTYPER: Event selection reg @@ -254,6 +257,7 @@ #define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */ #define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */ #define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */ +#define ARMV8_PMU_USERENR_UEN (1 << 4) /* Fine grained per counter access at EL0 */ /* Mask for writable bits */ #define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \ ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a8942277dda..cb99ec8c9e96 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -168,6 +168,15 @@ struct hw_perf_event { struct hw_perf_event_extra extra_reg; struct hw_perf_event_extra branch_reg; }; + struct { /* aux / Intel-PT */ + u64 aux_config; + /* + * For AUX area events, aux_paused cannot be a state + * flag because it can be updated asynchronously to + * state. + */ + unsigned int aux_paused; + }; struct { /* software */ struct hrtimer hrtimer; }; @@ -291,6 +300,20 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_NO_EXCLUDE 0x0040 #define PERF_PMU_CAP_AUX_OUTPUT 0x0080 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +#define PERF_PMU_CAP_AUX_PAUSE 0x0200 + +/** + * pmu::scope + */ +enum perf_pmu_scope { + PERF_PMU_SCOPE_NONE = 0, + PERF_PMU_SCOPE_CORE, + PERF_PMU_SCOPE_DIE, + PERF_PMU_SCOPE_CLUSTER, + PERF_PMU_SCOPE_PKG, + PERF_PMU_SCOPE_SYS_WIDE, + PERF_PMU_MAX_SCOPE, +}; struct perf_output_handle; @@ -315,6 +338,11 @@ struct pmu { */ int capabilities; + /* + * PMU scope + */ + unsigned int scope; + int __percpu *pmu_disable_count; struct perf_cpu_pmu_context __percpu *cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ @@ -363,6 +391,8 @@ struct pmu { #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ +#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */ +#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */ /* * Adds/Removes a counter to/from the PMU, can be done inside a @@ -402,6 +432,18 @@ struct pmu { * * ->start() with PERF_EF_RELOAD will reprogram the counter * value, must be preceded by a ->stop() with PERF_EF_UPDATE. + * + * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not + * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with + * PERF_EF_RESUME. + * + * ->start() with PERF_EF_RESUME will start as simply as possible but + * only if the counter is not otherwise stopped. Will not overlap + * another ->start() with PERF_EF_RESUME nor ->stop() with + * PERF_EF_PAUSE. + * + * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other + * ->stop()/->start() invocations, just not itself. */ void (*start) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags); @@ -615,10 +657,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and * cannot be a group leader. If an event with this flag is detached from the * group it is scheduled out and moved into an unrecoverable ERROR state. + * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the + * PMU scope where it is active. */ #define PERF_EV_CAP_SOFTWARE BIT(0) #define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1) #define PERF_EV_CAP_SIBLING BIT(2) +#define PERF_EV_CAP_READ_SCOPE BIT(3) #define SWEVENT_HLIST_BITS 8 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) @@ -963,12 +1008,16 @@ struct perf_event_context { struct rcu_head rcu_head; /* - * Sum (event->pending_work + event->pending_work) + * The count of events for which using the switch-out fast path + * should be avoided. + * + * Sum (event->pending_work + events with + * (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))) * * The SIGTRAP is targeted at ctx->task, as such it won't do changing * that until the signal is delivered. */ - local_t nr_pending; + local_t nr_no_switch_fast; }; struct perf_cpu_pmu_context { @@ -1602,13 +1651,7 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } -static inline int perf_allow_kernel(struct perf_event_attr *attr) -{ - if (sysctl_perf_event_paranoid > 1 && !perfmon_capable()) - return -EACCES; - - return security_perf_event_open(attr, PERF_SECURITY_KERNEL); -} +int perf_allow_kernel(struct perf_event_attr *attr); static inline int perf_allow_cpu(struct perf_event_attr *attr) { @@ -1633,15 +1676,35 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record, struct task_struct *task); extern void perf_bp_event(struct perf_event *event, void *data); -#ifndef perf_misc_flags -# define perf_misc_flags(regs) \ +extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs); +extern unsigned long perf_instruction_pointer(struct perf_event *event, + struct pt_regs *regs); + +#ifndef perf_arch_misc_flags +# define perf_arch_misc_flags(regs) \ (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) -# define perf_instruction_pointer(regs) instruction_pointer(regs) +# define perf_arch_instruction_pointer(regs) instruction_pointer(regs) #endif #ifndef perf_arch_bpf_user_pt_regs # define perf_arch_bpf_user_pt_regs(regs) regs #endif +#ifndef perf_arch_guest_misc_flags +static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) +{ + unsigned long guest_state = perf_guest_state(); + + if (!(guest_state & PERF_GUEST_ACTIVE)) + return 0; + + if (guest_state & PERF_GUEST_USER) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_GUEST_KERNEL; +} +# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) +#endif + static inline bool has_branch_stack(struct perf_event *event) { return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; @@ -1657,6 +1720,13 @@ static inline bool has_aux(struct perf_event *event) return event->pmu->setup_aux; } +static inline bool has_aux_action(struct perf_event *event) +{ + return event->attr.aux_sample_size || + event->attr.aux_pause || + event->attr.aux_resume; +} + static inline bool is_write_backward(struct perf_event *event) { return !!event->attr.write_backward; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 207f0c83c8e9..3469c4b20105 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -12,45 +12,166 @@ #include <linux/page_ext.h> extern struct page_ext_operations page_alloc_tagging_ops; +extern unsigned long alloc_tag_ref_mask; +extern int alloc_tag_ref_offs; +extern struct alloc_tag_kernel_section kernel_tags; -static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) +DECLARE_STATIC_KEY_FALSE(mem_profiling_compressed); + +typedef u16 pgalloc_tag_idx; + +union pgtag_ref_handle { + union codetag_ref *ref; /* reference in page extension */ + struct page *page; /* reference in page flags */ +}; + +/* Reserved indexes */ +#define CODETAG_ID_NULL 0 +#define CODETAG_ID_EMPTY 1 +#define CODETAG_ID_FIRST 2 + +#ifdef CONFIG_MODULES + +extern struct alloc_tag_module_section module_tags; + +static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx) { - return (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); + return &module_tags.first_tag[idx - kernel_tags.count]; } -static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) +static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag) { - return (void *)ref - page_alloc_tagging_ops.offset; + return CODETAG_ID_FIRST + kernel_tags.count + (tag - module_tags.first_tag); } -/* Should be called only if mem_alloc_profiling_enabled() */ -static inline union codetag_ref *get_page_tag_ref(struct page *page) +#else /* CONFIG_MODULES */ + +static inline struct alloc_tag *module_idx_to_tag(pgalloc_tag_idx idx) { - if (page) { - struct page_ext *page_ext = page_ext_get(page); + pr_warn("invalid page tag reference %lu\n", (unsigned long)idx); + return NULL; +} + +static inline pgalloc_tag_idx module_tag_to_idx(struct alloc_tag *tag) +{ + pr_warn("invalid page tag 0x%lx\n", (unsigned long)tag); + return CODETAG_ID_NULL; +} + +#endif /* CONFIG_MODULES */ - if (page_ext) - return codetag_ref_from_page_ext(page_ext); +static inline void idx_to_ref(pgalloc_tag_idx idx, union codetag_ref *ref) +{ + switch (idx) { + case (CODETAG_ID_NULL): + ref->ct = NULL; + break; + case (CODETAG_ID_EMPTY): + set_codetag_empty(ref); + break; + default: + idx -= CODETAG_ID_FIRST; + ref->ct = idx < kernel_tags.count ? + &kernel_tags.first_tag[idx].ct : + &module_idx_to_tag(idx)->ct; + break; } - return NULL; } -static inline void put_page_tag_ref(union codetag_ref *ref) +static inline pgalloc_tag_idx ref_to_idx(union codetag_ref *ref) +{ + struct alloc_tag *tag; + + if (!ref->ct) + return CODETAG_ID_NULL; + + if (is_codetag_empty(ref)) + return CODETAG_ID_EMPTY; + + tag = ct_to_alloc_tag(ref->ct); + if (tag >= kernel_tags.first_tag && tag < kernel_tags.first_tag + kernel_tags.count) + return CODETAG_ID_FIRST + (tag - kernel_tags.first_tag); + + return module_tag_to_idx(tag); +} + + + +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline bool get_page_tag_ref(struct page *page, union codetag_ref *ref, + union pgtag_ref_handle *handle) +{ + if (!page) + return false; + + if (static_key_enabled(&mem_profiling_compressed)) { + pgalloc_tag_idx idx; + + idx = (page->flags >> alloc_tag_ref_offs) & alloc_tag_ref_mask; + idx_to_ref(idx, ref); + handle->page = page; + } else { + struct page_ext *page_ext; + union codetag_ref *tmp; + + page_ext = page_ext_get(page); + if (!page_ext) + return false; + + tmp = (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); + ref->ct = tmp->ct; + handle->ref = tmp; + } + + return true; +} + +static inline void put_page_tag_ref(union pgtag_ref_handle handle) { - if (WARN_ON(!ref)) + if (WARN_ON(!handle.ref)) return; - page_ext_put(page_ext_from_codetag_ref(ref)); + if (!static_key_enabled(&mem_profiling_compressed)) + page_ext_put((void *)handle.ref - page_alloc_tagging_ops.offset); +} + +static inline void update_page_tag_ref(union pgtag_ref_handle handle, union codetag_ref *ref) +{ + if (static_key_enabled(&mem_profiling_compressed)) { + struct page *page = handle.page; + unsigned long old_flags; + unsigned long flags; + unsigned long idx; + + if (WARN_ON(!page || !ref)) + return; + + idx = (unsigned long)ref_to_idx(ref); + idx = (idx & alloc_tag_ref_mask) << alloc_tag_ref_offs; + do { + old_flags = READ_ONCE(page->flags); + flags = old_flags; + flags &= ~(alloc_tag_ref_mask << alloc_tag_ref_offs); + flags |= idx; + } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); + } else { + if (WARN_ON(!handle.ref || !ref)) + return; + + handle.ref->ct = ref->ct; + } } static inline void clear_page_tag_ref(struct page *page) { if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); + union pgtag_ref_handle handle; + union codetag_ref ref; - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); + if (get_page_tag_ref(page, &ref, &handle)) { + set_codetag_empty(&ref); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } } } @@ -59,11 +180,13 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); + union pgtag_ref_handle handle; + union codetag_ref ref; - if (ref) { - alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE * nr); - put_page_tag_ref(ref); + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } } } @@ -71,57 +194,30 @@ static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) { if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); + union pgtag_ref_handle handle; + union codetag_ref ref; - if (ref) { - alloc_tag_sub(ref, PAGE_SIZE * nr); - put_page_tag_ref(ref); + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub(&ref, PAGE_SIZE * nr); + update_page_tag_ref(handle, &ref); + put_page_tag_ref(handle); } } } -static inline void pgalloc_tag_split(struct page *page, unsigned int nr) -{ - int i; - struct page_ext *first_page_ext; - struct page_ext *page_ext; - union codetag_ref *ref; - struct alloc_tag *tag; - - if (!mem_alloc_profiling_enabled()) - return; - - first_page_ext = page_ext = page_ext_get(page); - if (unlikely(!page_ext)) - return; - - ref = codetag_ref_from_page_ext(page_ext); - if (!ref->ct) - goto out; - - tag = ct_to_alloc_tag(ref->ct); - page_ext = page_ext_next(page_ext); - for (i = 1; i < nr; i++) { - /* Set new reference to point to the original tag */ - alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag); - page_ext = page_ext_next(page_ext); - } -out: - page_ext_put(first_page_ext); -} - static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { struct alloc_tag *tag = NULL; if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - alloc_tag_sub_check(ref); - if (ref) { - if (ref->ct) - tag = ct_to_alloc_tag(ref->ct); - put_page_tag_ref(ref); + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub_check(&ref); + if (ref.ct) + tag = ct_to_alloc_tag(ref.ct); + put_page_tag_ref(handle); } } @@ -134,17 +230,22 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); } +void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); +void pgalloc_tag_swap(struct folio *new, struct folio *old); + +void __init alloc_tag_sec_init(void); + #else /* CONFIG_MEM_ALLOC_PROFILING */ -static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } -static inline void put_page_tag_ref(union codetag_ref *ref) {} static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {} static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} +static inline void alloc_tag_sec_init(void) {} +static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} +static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2a6a3cccfc36..adef9d6e9b1b 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -90,6 +90,27 @@ static inline unsigned long pud_index(unsigned long address) #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #endif +#ifndef kernel_pte_init +static inline void kernel_pte_init(void *addr) +{ +} +#define kernel_pte_init kernel_pte_init +#endif + +#ifndef pmd_init +static inline void pmd_init(void *addr) +{ +} +#define pmd_init pmd_init +#endif + +#ifndef pud_init +static inline void pud_init(void *addr) +{ +} +#define pud_init pud_init +#endif + #ifndef pte_offset_kernel static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) { @@ -447,6 +468,12 @@ static inline void arch_check_zapped_pmd(struct vm_area_struct *vma, } #endif +#ifndef arch_check_zapped_pud +static inline void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud) +{ +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, @@ -1050,44 +1077,6 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) } #endif -/* - * Use set_p*_safe(), and elide TLB flushing, when confident that *no* - * TLB flush will be required as a result of the "set". For example, use - * in scenarios where it is known ahead of time that the routine is - * setting non-present entries, or re-setting an existing entry to the - * same value. Otherwise, use the typical "set" helpers and flush the - * TLB. - */ -#define set_pte_safe(ptep, pte) \ -({ \ - WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \ - set_pte(ptep, pte); \ -}) - -#define set_pmd_safe(pmdp, pmd) \ -({ \ - WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \ - set_pmd(pmdp, pmd); \ -}) - -#define set_pud_safe(pudp, pud) \ -({ \ - WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \ - set_pud(pudp, pud); \ -}) - -#define set_p4d_safe(p4dp, p4d) \ -({ \ - WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ - set_p4d(p4dp, p4d); \ -}) - -#define set_pgd_safe(pgdp, pgd) \ -({ \ - WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ - set_pgd(pgdp, pgd); \ -}) - #ifndef __HAVE_ARCH_DO_SWAP_PAGE static inline void arch_do_swap_page_nr(struct mm_struct *mm, struct vm_area_struct *vma, @@ -1950,6 +1939,18 @@ typedef unsigned int pgtbl_mod_mask; #define MAX_PTRS_PER_P4D PTRS_PER_P4D #endif +#ifndef pte_pgprot +#define pte_pgprot(x) ((pgprot_t) {0}) +#endif + +#ifndef pmd_pgprot +#define pmd_pgprot(x) ((pgprot_t) {0}) +#endif + +#ifndef pud_pgprot +#define pud_pgprot(x) ((pgprot_t) {0}) +#endif + /* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: diff --git a/include/linux/phy.h b/include/linux/phy.h index 6b7d40d49129..563c46205685 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -554,6 +554,9 @@ struct macsec_ops; * @drv: Pointer to the driver for this PHY instance * @devlink: Create a link between phy dev and mac dev, if the external phy * used by current mac interface is managed by another mac interface. + * @phyindex: Unique id across the phy's parent tree of phys to address the PHY + * from userspace, similar to ifindex. A zero index means the PHY + * wasn't assigned an id yet. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -598,8 +601,8 @@ struct macsec_ops; * @adv_old: Saved advertised while power saving for WoL * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes - * @eee_enabled: Flag indicating whether the EEE feature is enabled * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_active: phylib private state, indicating that EEE has been negotiated * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host @@ -656,6 +659,7 @@ struct phy_device { struct device_link *devlink; + u32 phyindex; u32 phy_id; struct phy_c45_device_ids c45_ids; @@ -717,16 +721,15 @@ struct phy_device { /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); - bool eee_enabled; - - /* Host supported PHY interface types. Should be ignored if empty. */ - DECLARE_PHY_INTERFACE_MASK(host_interfaces); - /* Energy efficient ethernet modes which should be prohibited */ - u32 eee_broken_modes; + __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); bool enable_tx_lpi; + bool eee_active; struct eee_config eee_cfg; + /* Host supported PHY interface types. Should be ignored if empty. */ + DECLARE_PHY_INTERFACE_MASK(host_interfaces); + #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; unsigned int phy_num_led_triggers; @@ -873,8 +876,9 @@ struct phy_plca_status { /* Modes for PHY LED configuration */ enum phy_led_modes { - PHY_LED_ACTIVE_LOW = 0, - PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1, + PHY_LED_ACTIVE_HIGH = 0, + PHY_LED_ACTIVE_LOW = 1, + PHY_LED_INACTIVE_HIGH_IMPEDANCE = 2, /* keep it last */ __PHY_LED_MODES_NUM, @@ -1256,9 +1260,20 @@ size_t phy_speeds(unsigned int *speeds, size_t size, unsigned long *mask); void of_set_phy_supported(struct phy_device *phydev); void of_set_phy_eee_broken(struct phy_device *phydev); +void of_set_phy_timing_role(struct phy_device *phydev); int phy_speed_down_core(struct phy_device *phydev); /** + * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised. + * @phydev: The phy_device struct + * @link_mode: The broken EEE mode + */ +static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode) +{ + linkmode_set_bit(link_mode, phydev->eee_broken_modes); +} + +/** * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct */ @@ -1374,12 +1389,13 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); * @regnum: The register on the MMD to read * @val: Variable to read the register into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * @sleep_before_read: if it is true, sleep @sleep_us before read. - * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either * case, the last read value at @args is stored in @val. Must not * be called from atomic context if sleep_us or timeout_us are used. */ @@ -1777,6 +1793,8 @@ int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable); +int phy_sfp_connect_phy(void *upstream, struct phy_device *phy); +void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy); void phy_sfp_attach(void *upstream, struct sfp_bus *bus); void phy_sfp_detach(void *upstream, struct sfp_bus *bus); int phy_sfp_probe(struct phy_device *phydev, @@ -1877,7 +1895,6 @@ int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart); -int genphy_config_eee_advert(struct phy_device *phydev); int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); @@ -1945,7 +1962,6 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); -int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); diff --git a/include/linux/phy/phy-sun4i-usb.h b/include/linux/phy/phy-sun4i-usb.h index 91eb755ee73b..f3e7b13608e4 100644 --- a/include/linux/phy/phy-sun4i-usb.h +++ b/include/linux/phy/phy-sun4i-usb.h @@ -11,7 +11,7 @@ /** * sun4i_usb_phy_set_squelch_detect() - Enable/disable squelch detect * @phy: reference to a sun4i usb phy - * @enabled: wether to enable or disable squelch detect + * @enabled: whether to enable or disable squelch detect */ void sun4i_usb_phy_set_squelch_detect(struct phy *phy, bool enabled); diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h new file mode 100644 index 000000000000..68a59e25821c --- /dev/null +++ b/include/linux/phy_link_topology.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PHY device list allow maintaining a list of PHY devices that are + * part of a netdevice's link topology. PHYs can for example be chained, + * as is the case when using a PHY that exposes an SFP module, on which an + * SFP transceiver that embeds a PHY is connected. + * + * This list can then be used by userspace to leverage individual PHY + * capabilities. + */ +#ifndef __PHY_LINK_TOPOLOGY_H +#define __PHY_LINK_TOPOLOGY_H + +#include <linux/ethtool.h> +#include <linux/netdevice.h> + +struct xarray; +struct phy_device; +struct sfp_bus; + +struct phy_link_topology { + struct xarray phys; + u32 next_phy_index; +}; + +struct phy_device_node { + enum phy_upstream upstream_type; + + union { + struct net_device *netdev; + struct phy_device *phydev; + } upstream; + + struct sfp_bus *parent_sfp_bus; + + struct phy_device *phy; +}; + +#if IS_ENABLED(CONFIG_PHYLIB) +int phy_link_topo_add_phy(struct net_device *dev, + struct phy_device *phy, + enum phy_upstream upt, void *upstream); + +void phy_link_topo_del_phy(struct net_device *dev, struct phy_device *phy); + +static inline struct phy_device * +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) +{ + struct phy_link_topology *topo = dev->link_topo; + struct phy_device_node *pdn; + + if (!topo) + return NULL; + + pdn = xa_load(&topo->phys, phyindex); + if (pdn) + return pdn->phy; + + return NULL; +} + +#else +static inline int phy_link_topo_add_phy(struct net_device *dev, + struct phy_device *phy, + enum phy_upstream upt, void *upstream) +{ + return 0; +} + +static inline void phy_link_topo_del_phy(struct net_device *dev, + struct phy_device *phy) +{ +} + +static inline struct phy_device * +phy_link_topo_get_phy(struct net_device *dev, u32 phyindex) +{ + return NULL; +} +#endif + +#endif /* __PHY_LINK_TOPOLOGY_H */ diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2381e07429a2..5c01048860c4 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -598,6 +598,8 @@ int phylink_fwnode_phy_connect(struct phylink *pl, const struct fwnode_handle *fwnode, u32 flags); void phylink_disconnect_phy(struct phylink *); +int phylink_set_fixed_link(struct phylink *, + const struct phylink_link_state *); void phylink_mac_change(struct phylink *, bool up); void phylink_pcs_change(struct phylink_pcs *, bool up); diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index a65d3d078e58..53cfde98433d 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -81,6 +81,8 @@ struct pinctrl_map; * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin. * If the argument != 0, schmitt-trigger mode is enabled. If it's 0, * schmitt-trigger mode is disabled. + * @PIN_CONFIG_INPUT_SCHMITT_UV: this will configure an input pin to run in + * schmitt-trigger mode. The argument is in uV. * @PIN_CONFIG_MODE_LOW_POWER: this will configure the pin for low power * operation, if several modes of operation are supported these can be * passed in the argument on a custom form, else just use argument 1 @@ -132,6 +134,7 @@ enum pin_config_param { PIN_CONFIG_INPUT_ENABLE, PIN_CONFIG_INPUT_SCHMITT, PIN_CONFIG_INPUT_SCHMITT_ENABLE, + PIN_CONFIG_INPUT_SCHMITT_UV, PIN_CONFIG_MODE_LOW_POWER, PIN_CONFIG_MODE_PWM, PIN_CONFIG_OUTPUT, diff --git a/include/linux/platform_data/ad5449.h b/include/linux/platform_data/ad5449.h deleted file mode 100644 index d687ef5726c2..000000000000 --- a/include/linux/platform_data/ad5449.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * AD5415, AD5426, AD5429, AD5432, AD5439, AD5443, AD5449 Digital to Analog - * Converter driver. - * - * Copyright 2012 Analog Devices Inc. - * Author: Lars-Peter Clausen <lars@metafoo.de> - */ - -#ifndef __LINUX_PLATFORM_DATA_AD5449_H__ -#define __LINUX_PLATFORM_DATA_AD5449_H__ - -/** - * enum ad5449_sdo_mode - AD5449 SDO pin configuration - * @AD5449_SDO_DRIVE_FULL: Drive the SDO pin with full strength. - * @AD5449_SDO_DRIVE_WEAK: Drive the SDO pin with not full strength. - * @AD5449_SDO_OPEN_DRAIN: Operate the SDO pin in open-drain mode. - * @AD5449_SDO_DISABLED: Disable the SDO pin, in this mode it is not possible to - * read back from the device. - */ -enum ad5449_sdo_mode { - AD5449_SDO_DRIVE_FULL = 0x0, - AD5449_SDO_DRIVE_WEAK = 0x1, - AD5449_SDO_OPEN_DRAIN = 0x2, - AD5449_SDO_DISABLED = 0x3, -}; - -/** - * struct ad5449_platform_data - Platform data for the ad5449 DAC driver - * @sdo_mode: SDO pin mode - * @hardware_clear_to_midscale: Whether asserting the hardware CLR pin sets the - * outputs to midscale (true) or to zero scale(false). - */ -struct ad5449_platform_data { - enum ad5449_sdo_mode sdo_mode; - bool hardware_clear_to_midscale; -}; - -#endif diff --git a/include/linux/platform_data/amd_qdma.h b/include/linux/platform_data/amd_qdma.h new file mode 100644 index 000000000000..576d952f97ed --- /dev/null +++ b/include/linux/platform_data/amd_qdma.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _PLATDATA_AMD_QDMA_H +#define _PLATDATA_AMD_QDMA_H + +#include <linux/dmaengine.h> + +/** + * struct qdma_queue_info - DMA queue information. This information is used to + * match queue when DMA channel is requested + * @dir: Channel transfer direction + */ +struct qdma_queue_info { + enum dma_transfer_direction dir; +}; + +#define QDMA_FILTER_PARAM(qinfo) ((void *)(qinfo)) + +struct dma_slave_map; + +/** + * struct qdma_platdata - Platform specific data for QDMA engine + * @max_mm_channels: Maximum number of MM DMA channels in each direction + * @device_map: DMA slave map + * @irq_index: The index of first IRQ + */ +struct qdma_platdata { + u32 max_mm_channels; + u32 irq_index; + struct dma_slave_map *device_map; +}; + +#endif /* _PLATDATA_AMD_QDMA_H */ diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h index f9c00f839e9f..085dd8e8af76 100644 --- a/include/linux/platform_data/asoc-s3c.h +++ b/include/linux/platform_data/asoc-s3c.h @@ -13,8 +13,6 @@ #include <linux/dmaengine.h> -extern void s3c64xx_ac97_setup_gpio(int); - struct samsung_i2s_type { /* If the Primary DAI has 5.1 Channels */ #define QUIRK_PRI_6CHAN (1 << 0) diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index e574b790be6f..b3c4993e656e 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -1312,6 +1312,38 @@ enum ec_feature_code { * The EC supports the AP composing VDMs for us to send. */ EC_FEATURE_TYPEC_AP_VDM_SEND = 46, + /* + * The EC supports system safe mode panic recovery. + */ + EC_FEATURE_SYSTEM_SAFE_MODE = 47, + /* + * The EC will reboot on runtime assertion failures. + */ + EC_FEATURE_ASSERT_REBOOTS = 48, + /* + * The EC image is built with tokenized logging enabled. + */ + EC_FEATURE_TOKENIZED_LOGGING = 49, + /* + * The EC supports triggering an STB dump. + */ + EC_FEATURE_AMD_STB_DUMP = 50, + /* + * The EC supports memory dump commands. + */ + EC_FEATURE_MEMORY_DUMP = 51, + /* + * The EC supports DP2.1 capability + */ + EC_FEATURE_TYPEC_DP2_1 = 52, + /* + * The MCU is System Companion Processor Core 1 + */ + EC_FEATURE_SCP_C1 = 53, + /* + * The EC supports UCSI PPM. + */ + EC_FEATURE_UCSI_PPM = 54, }; #define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32) diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h deleted file mode 100644 index eb9805bb3fe8..000000000000 --- a/include/linux/platform_data/dma-ep93xx.h +++ /dev/null @@ -1,94 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_ARCH_DMA_H -#define __ASM_ARCH_DMA_H - -#include <linux/types.h> -#include <linux/dmaengine.h> -#include <linux/dma-mapping.h> - -/* - * M2P channels. - * - * Note that these values are also directly used for setting the PPALLOC - * register. - */ -#define EP93XX_DMA_I2S1 0 -#define EP93XX_DMA_I2S2 1 -#define EP93XX_DMA_AAC1 2 -#define EP93XX_DMA_AAC2 3 -#define EP93XX_DMA_AAC3 4 -#define EP93XX_DMA_I2S3 5 -#define EP93XX_DMA_UART1 6 -#define EP93XX_DMA_UART2 7 -#define EP93XX_DMA_UART3 8 -#define EP93XX_DMA_IRDA 9 -/* M2M channels */ -#define EP93XX_DMA_SSP 10 -#define EP93XX_DMA_IDE 11 - -/** - * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine - * @port: peripheral which is requesting the channel - * @direction: TX/RX channel - * @name: optional name for the channel, this is displayed in /proc/interrupts - * - * This information is passed as private channel parameter in a filter - * function. Note that this is only needed for slave/cyclic channels. For - * memcpy channels %NULL data should be passed. - */ -struct ep93xx_dma_data { - int port; - enum dma_transfer_direction direction; - const char *name; -}; - -/** - * struct ep93xx_dma_chan_data - platform specific data for a DMA channel - * @name: name of the channel, used for getting the right clock for the channel - * @base: mapped registers - * @irq: interrupt number used by this channel - */ -struct ep93xx_dma_chan_data { - const char *name; - void __iomem *base; - int irq; -}; - -/** - * struct ep93xx_dma_platform_data - platform data for the dmaengine driver - * @channels: array of channels which are passed to the driver - * @num_channels: number of channels in the array - * - * This structure is passed to the DMA engine driver via platform data. For - * M2P channels, contract is that even channels are for TX and odd for RX. - * There is no requirement for the M2M channels. - */ -struct ep93xx_dma_platform_data { - struct ep93xx_dma_chan_data *channels; - size_t num_channels; -}; - -static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan) -{ - return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p"); -} - -/** - * ep93xx_dma_chan_direction - returns direction the channel can be used - * @chan: channel - * - * This function can be used in filter functions to find out whether the - * channel supports given DMA direction. Only M2P channels have such - * limitation, for M2M channels the direction is configurable. - */ -static inline enum dma_transfer_direction -ep93xx_dma_chan_direction(struct dma_chan *chan) -{ - if (!ep93xx_dma_chan_is_m2p(chan)) - return DMA_TRANS_NONE; - - /* even channels are for TX, odd for RX */ - return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; -} - -#endif /* __ASM_ARCH_DMA_H */ diff --git a/include/linux/platform_data/eth-ep93xx.h b/include/linux/platform_data/eth-ep93xx.h deleted file mode 100644 index 8eef637a804d..000000000000 --- a/include/linux/platform_data/eth-ep93xx.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_PLATFORM_DATA_ETH_EP93XX -#define _LINUX_PLATFORM_DATA_ETH_EP93XX - -struct ep93xx_eth_data { - unsigned char dev_addr[6]; - unsigned char phy_id; -}; - -#endif diff --git a/include/linux/platform_data/gpio-ath79.h b/include/linux/platform_data/gpio-ath79.h deleted file mode 100644 index 3ea6dd942c27..000000000000 --- a/include/linux/platform_data/gpio-ath79.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Atheros AR7XXX/AR9XXX GPIO controller platform data - * - * Copyright (C) 2015 Alban Bedel <albeu@free.fr> - */ - -#ifndef __LINUX_PLATFORM_DATA_GPIO_ATH79_H -#define __LINUX_PLATFORM_DATA_GPIO_ATH79_H - -struct ath79_gpio_platform_data { - unsigned ngpios; - bool oe_inverted; -}; - -#endif diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h deleted file mode 100644 index b82e44662efe..000000000000 --- a/include/linux/platform_data/gpio-davinci.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * DaVinci GPIO Platform Related Defines - * - * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com/ - */ - -#ifndef __DAVINCI_GPIO_PLATFORM_H -#define __DAVINCI_GPIO_PLATFORM_H - -struct davinci_gpio_platform_data { - bool no_auto_base; - u32 base; - u32 ngpio; - u32 gpio_unbanked; -}; - -/* Convert GPIO signal to GPIO pin number */ -#define GPIO_TO_PIN(bank, gpio) (16 * (bank) + (gpio)) - -#endif diff --git a/include/linux/platform_data/hwmon-s3c.h b/include/linux/platform_data/hwmon-s3c.h index 1707ad4147df..7d21e0c41037 100644 --- a/include/linux/platform_data/hwmon-s3c.h +++ b/include/linux/platform_data/hwmon-s3c.h @@ -33,14 +33,4 @@ struct s3c_hwmon_pdata { struct s3c_hwmon_chcfg *in[8]; }; -/** - * s3c_hwmon_set_platdata - Set platform data for S3C HWMON device - * @pd: Platform data to register to device. - * - * Register the given platform data for use with the S3C HWMON device. - * The call will copy the platform data, so the board definitions can - * make the structure itself __initdata. - */ -extern void __init s3c_hwmon_set_platdata(struct s3c_hwmon_pdata *pd); - #endif /* __HWMON_S3C_H__ */ diff --git a/include/linux/platform_data/keypad-ep93xx.h b/include/linux/platform_data/keypad-ep93xx.h deleted file mode 100644 index 3054fced8509..000000000000 --- a/include/linux/platform_data/keypad-ep93xx.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __KEYPAD_EP93XX_H -#define __KEYPAD_EP93XX_H - -struct matrix_keymap_data; - -/* flags for the ep93xx_keypad driver */ -#define EP93XX_KEYPAD_DISABLE_3_KEY (1<<0) /* disable 3-key reset */ -#define EP93XX_KEYPAD_DIAG_MODE (1<<1) /* diagnostic mode */ -#define EP93XX_KEYPAD_BACK_DRIVE (1<<2) /* back driving mode */ -#define EP93XX_KEYPAD_TEST_MODE (1<<3) /* scan only column 0 */ -#define EP93XX_KEYPAD_AUTOREPEAT (1<<4) /* enable key autorepeat */ - -/** - * struct ep93xx_keypad_platform_data - platform specific device structure - * @keymap_data: pointer to &matrix_keymap_data - * @debounce: debounce start count; terminal count is 0xff - * @prescale: row/column counter pre-scaler load value - * @flags: see above - */ -struct ep93xx_keypad_platform_data { - struct matrix_keymap_data *keymap_data; - unsigned int debounce; - unsigned int prescale; - unsigned int flags; - unsigned int clk_rate; -}; - -#define EP93XX_MATRIX_ROWS (8) -#define EP93XX_MATRIX_COLS (8) - -#endif /* __KEYPAD_EP93XX_H */ diff --git a/include/linux/platform_data/keyscan-davinci.h b/include/linux/platform_data/keyscan-davinci.h deleted file mode 100644 index 260d596ba0af..000000000000 --- a/include/linux/platform_data/keyscan-davinci.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009 Texas Instruments, Inc - * - * Author: Miguel Aguilar <miguel.aguilar@ridgerun.com> - */ - -#ifndef DAVINCI_KEYSCAN_H -#define DAVINCI_KEYSCAN_H - -#include <linux/io.h> - -enum davinci_matrix_types { - DAVINCI_KEYSCAN_MATRIX_4X4, - DAVINCI_KEYSCAN_MATRIX_5X3, -}; - -struct davinci_ks_platform_data { - int (*device_enable)(struct device *dev); - unsigned short *keymap; - u32 keymapsize; - u8 rep:1; - u8 strobe; - u8 interval; - u8 matrix_type; -}; - -#endif - diff --git a/include/linux/platform_data/max6639.h b/include/linux/platform_data/max6639.h deleted file mode 100644 index 65bfdb4fdc15..000000000000 --- a/include/linux/platform_data/max6639.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_MAX6639_H -#define _LINUX_MAX6639_H - -#include <linux/types.h> - -/* platform data for the MAX6639 temperature sensor and fan control */ - -struct max6639_platform_data { - bool pwm_polarity; /* Polarity low (0) or high (1, default) */ - int ppr; /* Pulses per rotation 1..4 (default == 2) */ - int rpm_range; /* 2000, 4000 (default), 8000 or 16000 */ -}; - -#endif /* _LINUX_MAX6639_H */ diff --git a/include/linux/platform_data/max6697.h b/include/linux/platform_data/max6697.h deleted file mode 100644 index 6fbb70005541..000000000000 --- a/include/linux/platform_data/max6697.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * max6697.h - * Copyright (c) 2012 Guenter Roeck <linux@roeck-us.net> - */ - -#ifndef MAX6697_H -#define MAX6697_H - -#include <linux/types.h> - -/* - * For all bit masks: - * bit 0: local temperature - * bit 1..7: remote temperatures - */ -struct max6697_platform_data { - bool smbus_timeout_disable; /* set to disable SMBus timeouts */ - bool extended_range_enable; /* set to enable extended temp range */ - bool beta_compensation; /* set to enable beta compensation */ - u8 alert_mask; /* set bit to 1 to disable alert */ - u8 over_temperature_mask; /* set bit to 1 to disable */ - u8 resistance_cancellation; /* set bit to 0 to disable - * bit mask for MAX6581, - * boolean for other chips - */ - u8 ideality_mask; /* set bit to 0 to disable */ - u8 ideality_value; /* transistor ideality as per - * MAX6581 datasheet - */ -}; - -#endif /* MAX6697_H */ diff --git a/include/linux/platform_data/media/omap4iss.h b/include/linux/platform_data/media/omap4iss.h deleted file mode 100644 index 2a511a8fcda7..000000000000 --- a/include/linux/platform_data/media/omap4iss.h +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ARCH_ARM_PLAT_OMAP4_ISS_H -#define ARCH_ARM_PLAT_OMAP4_ISS_H - -#include <linux/i2c.h> - -struct iss_device; - -enum iss_interface_type { - ISS_INTERFACE_CSI2A_PHY1, - ISS_INTERFACE_CSI2B_PHY2, -}; - -/** - * struct iss_csiphy_lane: CSI2 lane position and polarity - * @pos: position of the lane - * @pol: polarity of the lane - */ -struct iss_csiphy_lane { - u8 pos; - u8 pol; -}; - -#define ISS_CSIPHY1_NUM_DATA_LANES 4 -#define ISS_CSIPHY2_NUM_DATA_LANES 1 - -/** - * struct iss_csiphy_lanes_cfg - CSI2 lane configuration - * @data: Configuration of one or two data lanes - * @clk: Clock lane configuration - */ -struct iss_csiphy_lanes_cfg { - struct iss_csiphy_lane data[ISS_CSIPHY1_NUM_DATA_LANES]; - struct iss_csiphy_lane clk; -}; - -/** - * struct iss_csi2_platform_data - CSI2 interface platform data - * @crc: Enable the cyclic redundancy check - * @vpclk_div: Video port output clock control - */ -struct iss_csi2_platform_data { - unsigned crc:1; - unsigned vpclk_div:2; - struct iss_csiphy_lanes_cfg lanecfg; -}; - -struct iss_subdev_i2c_board_info { - struct i2c_board_info *board_info; - int i2c_adapter_id; -}; - -struct iss_v4l2_subdevs_group { - struct iss_subdev_i2c_board_info *subdevs; - enum iss_interface_type interface; - union { - struct iss_csi2_platform_data csi2; - } bus; /* gcc < 4.6.0 chokes on anonymous union initializers */ -}; - -struct iss_platform_data { - struct iss_v4l2_subdevs_group *subdevs; - void (*set_constraints)(struct iss_device *iss, bool enable); -}; - -#endif diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index 8c659db4da6b..0e0e8fe6975f 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -27,7 +27,9 @@ enum ksz_chip_id { KSZ8795_CHIP_ID = 0x8795, KSZ8794_CHIP_ID = 0x8794, KSZ8765_CHIP_ID = 0x8765, - KSZ8830_CHIP_ID = 0x8830, + KSZ88X3_CHIP_ID = 0x8830, + KSZ8864_CHIP_ID = 0x8864, + KSZ8895_CHIP_ID = 0x8895, KSZ9477_CHIP_ID = 0x00947700, KSZ9896_CHIP_ID = 0x00989600, KSZ9897_CHIP_ID = 0x00989700, @@ -40,6 +42,7 @@ enum ksz_chip_id { LAN9372_CHIP_ID = 0x00937200, LAN9373_CHIP_ID = 0x00937300, LAN9374_CHIP_ID = 0x00937400, + LAN9646_CHIP_ID = 0x00964600, }; struct ksz_platform_data { diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h deleted file mode 100644 index a49826214a39..000000000000 --- a/include/linux/platform_data/mtd-davinci-aemif.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * TI DaVinci AEMIF support - * - * Copyright 2010 (C) Texas Instruments, Inc. https://www.ti.com/ - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ -#ifndef _MACH_DAVINCI_AEMIF_H -#define _MACH_DAVINCI_AEMIF_H - -#include <linux/platform_device.h> - -#define NRCSR_OFFSET 0x00 -#define AWCCR_OFFSET 0x04 -#define A1CR_OFFSET 0x10 - -#define ACR_ASIZE_MASK 0x3 -#define ACR_EW_MASK BIT(30) -#define ACR_SS_MASK BIT(31) - -/* All timings in nanoseconds */ -struct davinci_aemif_timing { - u8 wsetup; - u8 wstrobe; - u8 whold; - - u8 rsetup; - u8 rstrobe; - u8 rhold; - - u8 ta; -}; - -#endif diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h deleted file mode 100644 index dd474dd44848..000000000000 --- a/include/linux/platform_data/mtd-davinci.h +++ /dev/null @@ -1,88 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mach-davinci/nand.h - * - * Copyright © 2006 Texas Instruments. - * - * Ported to 2.6.23 Copyright © 2008 by - * Sander Huijsen <Shuijsen@optelecom-nkf.com> - * Troy Kisky <troy.kisky@boundarydevices.com> - * Dirk Behme <Dirk.Behme@gmail.com> - * - * -------------------------------------------------------------------------- - */ - -#ifndef __ARCH_ARM_DAVINCI_NAND_H -#define __ARCH_ARM_DAVINCI_NAND_H - -#include <linux/mtd/rawnand.h> - -#define NANDFCR_OFFSET 0x60 -#define NANDFSR_OFFSET 0x64 -#define NANDF1ECC_OFFSET 0x70 - -/* 4-bit ECC syndrome registers */ -#define NAND_4BIT_ECC_LOAD_OFFSET 0xbc -#define NAND_4BIT_ECC1_OFFSET 0xc0 -#define NAND_4BIT_ECC2_OFFSET 0xc4 -#define NAND_4BIT_ECC3_OFFSET 0xc8 -#define NAND_4BIT_ECC4_OFFSET 0xcc -#define NAND_ERR_ADD1_OFFSET 0xd0 -#define NAND_ERR_ADD2_OFFSET 0xd4 -#define NAND_ERR_ERRVAL1_OFFSET 0xd8 -#define NAND_ERR_ERRVAL2_OFFSET 0xdc - -/* NOTE: boards don't need to use these address bits - * for ALE/CLE unless they support booting from NAND. - * They're used unless platform data overrides them. - */ -#define MASK_ALE 0x08 -#define MASK_CLE 0x10 - -struct davinci_nand_pdata { /* platform_data */ - uint32_t mask_ale; - uint32_t mask_cle; - - /* - * 0-indexed chip-select number of the asynchronous - * interface to which the NAND device has been connected. - * - * So, if you have NAND connected to CS3 of DA850, you - * will pass '1' here. Since the asynchronous interface - * on DA850 starts from CS2. - */ - uint32_t core_chipsel; - - /* for packages using two chipselects */ - uint32_t mask_chipsel; - - /* board's default static partition info */ - struct mtd_partition *parts; - unsigned nr_parts; - - /* none == NAND_ECC_ENGINE_TYPE_NONE (strongly *not* advised!!) - * soft == NAND_ECC_ENGINE_TYPE_SOFT - * else == NAND_ECC_ENGINE_TYPE_ON_HOST, according to ecc_bits - * - * All DaVinci-family chips support 1-bit hardware ECC. - * Newer ones also support 4-bit ECC, but are awkward - * using it with large page chips. - */ - enum nand_ecc_engine_type engine_type; - enum nand_ecc_placement ecc_placement; - u8 ecc_bits; - - /* e.g. NAND_BUSWIDTH_16 */ - unsigned options; - /* e.g. NAND_BBT_USE_FLASH */ - unsigned bbt_options; - - /* Main and mirror bbt descriptor overrides */ - struct nand_bbt_descr *bbt_td; - struct nand_bbt_descr *bbt_md; - - /* Access timings */ - struct davinci_aemif_timing *timing; -}; - -#endif /* __ARCH_ARM_DAVINCI_NAND_H */ diff --git a/include/linux/platform_data/sa11x0-serial.h b/include/linux/platform_data/sa11x0-serial.h index 8b79ab08af45..a88096bc74e4 100644 --- a/include/linux/platform_data/sa11x0-serial.h +++ b/include/linux/platform_data/sa11x0-serial.h @@ -10,7 +10,6 @@ #define SA11X0_SERIAL_H struct uart_port; -struct uart_info; /* * This is a temporary structure for registering these diff --git a/include/linux/platform_data/spi-ep93xx.h b/include/linux/platform_data/spi-ep93xx.h deleted file mode 100644 index b439f2a896e0..000000000000 --- a/include/linux/platform_data/spi-ep93xx.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_EP93XX_SPI_H -#define __ASM_MACH_EP93XX_SPI_H - -struct spi_device; - -/** - * struct ep93xx_spi_info - EP93xx specific SPI descriptor - * @use_dma: use DMA for the transfers - */ -struct ep93xx_spi_info { - bool use_dma; -}; - -#endif /* __ASM_MACH_EP93XX_SPI_H */ diff --git a/include/linux/platform_data/ti-aemif.h b/include/linux/platform_data/ti-aemif.h deleted file mode 100644 index 77625251df07..000000000000 --- a/include/linux/platform_data/ti-aemif.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * TI DaVinci AEMIF platform glue. - * - * Copyright (C) 2017 BayLibre SAS - * - * Author: - * Bartosz Golaszewski <bgolaszewski@baylibre.com> - */ - -#ifndef __TI_DAVINCI_AEMIF_DATA_H__ -#define __TI_DAVINCI_AEMIF_DATA_H__ - -#include <linux/of_platform.h> - -/** - * struct aemif_abus_data - Async bus configuration parameters. - * - * @cs - Chip-select number. - */ -struct aemif_abus_data { - u32 cs; -}; - -/** - * struct aemif_platform_data - Data to set up the TI aemif driver. - * - * @dev_lookup: of_dev_auxdata passed to of_platform_populate() for aemif - * subdevices. - * @cs_offset: Lowest allowed chip-select number. - * @abus_data: Array of async bus configuration entries. - * @num_abus_data: Number of abus entries. - * @sub_devices: Array of platform subdevices. - * @num_sub_devices: Number of subdevices. - */ -struct aemif_platform_data { - struct of_dev_auxdata *dev_lookup; - u32 cs_offset; - struct aemif_abus_data *abus_data; - size_t num_abus_data; - struct platform_device *sub_devices; - size_t num_sub_devices; -}; - -#endif /* __TI_DAVINCI_AEMIF_DATA_H__ */ diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 0aeeae1c1943..365e119bebaa 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -4,6 +4,7 @@ #include <linux/errno.h> #include <linux/types.h> +#include <linux/dmi.h> /* WMI Methods */ #define ASUS_WMI_METHODID_SPEC 0x43455053 /* BIOS SPECification */ @@ -62,12 +63,14 @@ #define ASUS_WMI_DEVID_KBD_BACKLIGHT 0x00050021 #define ASUS_WMI_DEVID_LIGHT_SENSOR 0x00050022 /* ?? */ #define ASUS_WMI_DEVID_LIGHTBAR 0x00050025 +#define ASUS_WMI_DEVID_OOBE 0x0005002F /* This can only be used to disable the screen, not re-enable */ #define ASUS_WMI_DEVID_SCREENPAD_POWER 0x00050031 /* Writing a brightness re-enables the screen if disabled */ #define ASUS_WMI_DEVID_SCREENPAD_LIGHT 0x00050032 #define ASUS_WMI_DEVID_FAN_BOOST_MODE 0x00110018 #define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY 0x00120075 +#define ASUS_WMI_DEVID_THROTTLE_THERMAL_POLICY_VIVO 0x00110019 /* Misc */ #define ASUS_WMI_DEVID_PANEL_OD 0x00050019 @@ -164,4 +167,39 @@ static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, } #endif +/* To be used by both hid-asus and asus-wmi to determine which controls kbd_brightness */ +static const struct dmi_system_id asus_use_hid_led_dmi_ids[] = { + { + .matches = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Zephyrus"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Strix"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_PRODUCT_FAMILY, "ROG Flow"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GA403U"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "GU605M"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { }, +}; + #endif /* __PLATFORM_DATA_X86_ASUS_WMI_H */ diff --git a/include/linux/platform_data/intel-mid_wdt.h b/include/linux/platform_data/x86/intel-mid_wdt.h index 8dba70b4b020..e5c0210d0fec 100644 --- a/include/linux/platform_data/intel-mid_wdt.h +++ b/include/linux/platform_data/x86/intel-mid_wdt.h @@ -6,8 +6,8 @@ * Contact: David Cohen <david.a.cohen@linux.intel.com> */ -#ifndef __INTEL_MID_WDT_H__ -#define __INTEL_MID_WDT_H__ +#ifndef __PLATFORM_X86_INTEL_MID_WDT_H_ +#define __PLATFORM_X86_INTEL_MID_WDT_H_ #include <linux/platform_device.h> @@ -16,4 +16,4 @@ struct intel_mid_wdt_pdata { int (*probe)(struct platform_device *pdev); }; -#endif /*__INTEL_MID_WDT_H__*/ +#endif /* __PLATFORM_X86_INTEL_MID_WDT_H_ */ diff --git a/include/linux/platform_data/x86/intel_scu_ipc.h b/include/linux/platform_data/x86/intel_scu_ipc.h new file mode 100644 index 000000000000..b287627759f7 --- /dev/null +++ b/include/linux/platform_data/x86/intel_scu_ipc.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PLATFORM_X86_INTEL_SCU_IPC_H_ +#define __PLATFORM_X86_INTEL_SCU_IPC_H_ + +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/types.h> + +struct device; +struct module; + +struct intel_scu_ipc_dev; + +/** + * struct intel_scu_ipc_data - Data used to configure SCU IPC + * @mem: Base address of SCU IPC MMIO registers + * @irq: The IRQ number used for SCU (optional) + */ +struct intel_scu_ipc_data { + struct resource mem; + int irq; +}; + +struct intel_scu_ipc_dev * +__intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define intel_scu_ipc_register(parent, scu_data) \ + __intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu); + +struct intel_scu_ipc_dev * +__devm_intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define devm_intel_scu_ipc_register(parent, scu_data) \ + __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void); +void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu); +struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev); + +int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 *data); +int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data); +int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); +int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); + +int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data, u8 mask); + +int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub); +int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + size_t size, void *out, size_t outlen); + +static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + void *out, size_t outlen) +{ + return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen, + inlen, out, outlen); +} + +#endif diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index d422db6eec63..074754c23d33 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -52,7 +52,7 @@ struct platform_device { extern int platform_device_register(struct platform_device *); extern void platform_device_unregister(struct platform_device *); -extern struct bus_type platform_bus_type; +extern const struct bus_type platform_bus_type; extern struct device platform_bus; extern struct resource *platform_get_resource(struct platform_device *, @@ -235,17 +235,7 @@ extern void platform_device_put(struct platform_device *pdev); struct platform_driver { int (*probe)(struct platform_device *); - - /* - * .remove_new() is a relic from a prototype conversion of .remove(). - * New drivers are supposed to implement .remove(). Once all drivers are - * converted to not use .remove_new any more, it will be dropped. - */ - union { - void (*remove)(struct platform_device *); - void (*remove_new)(struct platform_device *); - }; - + void (*remove)(struct platform_device *); void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); int (*resume)(struct platform_device *); diff --git a/include/linux/pm.h b/include/linux/pm.h index 97b0e23363c8..e7f0260f15ad 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -385,7 +385,7 @@ const struct dev_pm_ops name = { \ #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) #define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns) +#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns") #else #define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) #define EXPORT_PM_FN_GPL(name) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 858c8e7851fb..45646bfcaf1a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -30,9 +30,16 @@ * supplier and its PM domain when creating the * device-links. * + * PD_FLAG_REQUIRED_OPP: Assign required_devs for the required OPPs. The + * index of the required OPP must correspond to the + * index in the array of the pd_names. If pd_names + * isn't specified, the index just follows the + * index for the attached PM domain. + * */ #define PD_FLAG_NO_DEV_LINK BIT(0) #define PD_FLAG_DEV_LINK_ON BIT(1) +#define PD_FLAG_REQUIRED_OPP BIT(2) struct dev_pm_domain_attach_data { const char * const *pd_names; @@ -43,6 +50,7 @@ struct dev_pm_domain_attach_data { struct dev_pm_domain_list { struct device **pd_devs; struct device_link **pd_links; + u32 *opp_tokens; u32 num_pds; }; @@ -92,6 +100,10 @@ struct dev_pm_domain_list { * GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states, * but its corresponding OPP tables are not * described in DT, but are given directly by FW. + * + * GENPD_FLAG_DEV_NAME_FW: Instructs genpd to generate an unique device name + * using ida. It is used by genpd providers which + * get their genpd-names directly from FW. */ #define GENPD_FLAG_PM_CLK (1U << 0) #define GENPD_FLAG_IRQ_SAFE (1U << 1) @@ -101,6 +113,7 @@ struct dev_pm_domain_list { #define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5) #define GENPD_FLAG_MIN_RESIDENCY (1U << 6) #define GENPD_FLAG_OPP_TABLE_FW (1U << 7) +#define GENPD_FLAG_DEV_NAME_FW (1U << 8) enum gpd_status { GENPD_STATE_ON = 0, /* PM domain is on */ @@ -163,6 +176,7 @@ struct generic_pm_domain { atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ unsigned int device_count; /* Number of devices */ + unsigned int device_id; /* unique device id */ unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ @@ -198,8 +212,11 @@ struct generic_pm_domain { spinlock_t slock; unsigned long lock_flags; }; + struct { + raw_spinlock_t raw_slock; + unsigned long raw_lock_flags; + }; }; - }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) @@ -241,6 +258,7 @@ struct generic_pm_domain_data { unsigned int performance_state; unsigned int default_pstate; unsigned int rpm_pstate; + unsigned int opp_token; bool hw_mode; void *data; }; @@ -473,6 +491,9 @@ struct device *dev_pm_domain_attach_by_name(struct device *dev, int dev_pm_domain_attach_list(struct device *dev, const struct dev_pm_domain_attach_data *data, struct dev_pm_domain_list **list); +int devm_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list); void dev_pm_domain_detach(struct device *dev, bool power_off); void dev_pm_domain_detach_list(struct dev_pm_domain_list *list); int dev_pm_domain_start(struct device *dev); @@ -499,6 +520,14 @@ static inline int dev_pm_domain_attach_list(struct device *dev, { return 0; } + +static inline int devm_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list) +{ + return 0; +} + static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} static inline void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) {} static inline int dev_pm_domain_start(struct device *dev) diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 6424692c30b7..568183e3e641 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -62,11 +62,8 @@ typedef int (*config_clks_t)(struct device *dev, struct opp_table *opp_table, * @supported_hw: Array of hierarchy of versions to match. * @supported_hw_count: Number of elements in the array. * @regulator_names: Array of pointers to the names of the regulator, NULL terminated. - * @genpd_names: Null terminated array of pointers containing names of genpd to - * attach. Mutually exclusive with required_devs. - * @virt_devs: Pointer to return the array of genpd virtual devices. Mutually - * exclusive with required_devs. - * @required_devs: Required OPP devices. Mutually exclusive with genpd_names/virt_devs. + * @required_dev: The required OPP device. + * @required_dev_index: The index of the required OPP for the @required_dev. * * This structure contains platform specific OPP configurations for the device. */ @@ -79,9 +76,8 @@ struct dev_pm_opp_config { const unsigned int *supported_hw; unsigned int supported_hw_count; const char * const *regulator_names; - const char * const *genpd_names; - struct device ***virt_devs; - struct device **required_devs; + struct device *required_dev; + unsigned int required_dev_index; }; #define OPP_LEVEL_UNSET U32_MAX @@ -675,36 +671,6 @@ static inline void dev_pm_opp_put_config_regulators(int token) dev_pm_opp_clear_config(token); } -/* genpd helpers */ -static inline int dev_pm_opp_attach_genpd(struct device *dev, - const char * const *names, - struct device ***virt_devs) -{ - struct dev_pm_opp_config config = { - .genpd_names = names, - .virt_devs = virt_devs, - }; - - return dev_pm_opp_set_config(dev, &config); -} - -static inline void dev_pm_opp_detach_genpd(int token) -{ - dev_pm_opp_clear_config(token); -} - -static inline int devm_pm_opp_attach_genpd(struct device *dev, - const char * const *names, - struct device ***virt_devs) -{ - struct dev_pm_opp_config config = { - .genpd_names = names, - .virt_devs = virt_devs, - }; - - return devm_pm_opp_set_config(dev, &config); -} - /* prop-name helpers */ static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name) { diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 76cd1f9f1365..222f7530806c 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -10,7 +10,7 @@ #define _LINUX_PM_WAKEUP_H #ifndef _DEVICE_H_ -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include <linux/types.h> diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index dc7b738de299..f11f10c97bd9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -5,12 +5,16 @@ #include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> +#include <linux/pid.h> #include <linux/posix-timers_types.h> +#include <linux/rcuref.h> #include <linux/spinlock.h> #include <linux/timerqueue.h> struct kernel_siginfo; struct task_struct; +struct sigqueue; +struct k_itimer; static inline clockid_t make_process_cpuclock(const unsigned int pid, const clockid_t clock) @@ -35,6 +39,8 @@ static inline int clockid_to_fd(const clockid_t clk) #ifdef CONFIG_POSIX_TIMERS +#include <linux/signal_types.h> + /** * cpu_timer - Posix CPU timer representation for k_itimer * @node: timerqueue node to queue in the task/sig @@ -42,6 +48,7 @@ static inline int clockid_to_fd(const clockid_t clk) * @pid: Pointer to target task PID * @elist: List head for the expiry list * @firing: Timer is currently firing + * @nanosleep: Timer is used for nanosleep and is not a regular posix-timer * @handling: Pointer to the task which handles expiry */ struct cpu_timer { @@ -49,7 +56,8 @@ struct cpu_timer { struct timerqueue_head *head; struct pid *pid; struct list_head elist; - int firing; + bool firing; + bool nanosleep; struct task_struct __rcu *handling; }; @@ -101,6 +109,12 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, pct->bases[CPUCLOCK_SCHED].nextevt = runtime; } +void posixtimer_rearm_itimer(struct task_struct *p); +bool posixtimer_init_sigqueue(struct sigqueue *q); +void posixtimer_send_sigqueue(struct k_itimer *tmr); +bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); +void posixtimer_free_timer(struct k_itimer *timer); + /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ .nextevt = U64_MAX, \ @@ -122,6 +136,10 @@ struct cpu_timer { }; static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } +static inline void posixtimer_rearm_itimer(struct task_struct *p) { } +static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info, + struct sigqueue *timer_sigq) { return false; } +static inline void posixtimer_free_timer(struct k_itimer *timer) { } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK @@ -132,50 +150,56 @@ static inline void clear_posix_cputimers_work(struct task_struct *p) { } static inline void posix_cputimers_init_work(void) { } #endif -#define REQUEUE_PENDING 1 - /** * struct k_itimer - POSIX.1b interval timer structure. - * @list: List head for binding the timer to signals->posix_timers + * @list: List node for binding the timer to tsk::signal::posix_timers + * @ignored_list: List node for tracking ignored timers in tsk::signal::ignored_posix_timers * @t_hash: Entry in the posix timer hash table * @it_lock: Lock protecting the timer * @kclock: Pointer to the k_clock struct handling this timer * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer - * @it_active: Marker that timer is active + * @it_status: The status of the timer + * @it_sig_periodic: The periodic status at signal delivery * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal - * @it_requeue_pending: Indicator that timer waits for being requeued on - * signal delivery + * @it_signal_seq: Sequence count to control signal delivery + * @it_sigqueue_seq: The sequence count at the point where the signal was queued * @it_sigev_notify: The notify word of sigevent struct for signal delivery * @it_interval: The interval for periodic timers * @it_signal: Pointer to the creators signal struct * @it_pid: The pid of the process/task targeted by the signal * @it_process: The task to wakeup on clock_nanosleep (CPU timers) - * @sigq: Pointer to preallocated sigqueue + * @rcuref: Reference count for life time management + * @sigq: Embedded sigqueue * @it: Union representing the various posix timer type * internals. * @rcu: RCU head for freeing the timer. */ struct k_itimer { - struct list_head list; + struct hlist_node list; + struct hlist_node ignored_list; struct hlist_node t_hash; spinlock_t it_lock; const struct k_clock *kclock; clockid_t it_clock; timer_t it_id; - int it_active; + int it_status; + bool it_sig_periodic; s64 it_overrun; s64 it_overrun_last; - int it_requeue_pending; + unsigned int it_signal_seq; + unsigned int it_sigqueue_seq; int it_sigev_notify; + enum pid_type it_pid_type; ktime_t it_interval; struct signal_struct *it_signal; union { struct pid *it_pid; struct task_struct *it_process; }; - struct sigqueue *sigq; + struct sigqueue sigq; + rcuref_t rcuref; union { struct { struct hrtimer timer; @@ -196,5 +220,29 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); -void posixtimer_rearm(struct kernel_siginfo *info); +#ifdef CONFIG_POSIX_TIMERS +static inline void posixtimer_putref(struct k_itimer *tmr) +{ + if (rcuref_put(&tmr->rcuref)) + posixtimer_free_timer(tmr); +} + +static inline void posixtimer_sigqueue_getref(struct sigqueue *q) +{ + struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); + + WARN_ON_ONCE(!rcuref_get(&tmr->rcuref)); +} + +static inline void posixtimer_sigqueue_putref(struct sigqueue *q) +{ + struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); + + posixtimer_putref(tmr); +} +#else /* CONFIG_POSIX_TIMERS */ +static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { } +static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { } +#endif /* !CONFIG_POSIX_TIMERS */ + #endif diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 0e65b3d634d9..e2d47eb1a7f3 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -28,9 +28,9 @@ struct posix_acl_entry { struct posix_acl { refcount_t a_refcount; - struct rcu_head a_rcu; unsigned int a_count; - struct posix_acl_entry a_entries[]; + struct rcu_head a_rcu; + struct posix_acl_entry a_entries[] __counted_by(a_count); }; #define FOREACH_ACL_ENTRY(pa, acl, pe) \ @@ -62,7 +62,7 @@ posix_acl_release(struct posix_acl *acl) /* posix_acl.c */ extern void posix_acl_init(struct posix_acl *, int); -extern struct posix_acl *posix_acl_alloc(int, gfp_t); +extern struct posix_acl *posix_acl_alloc(unsigned int count, gfp_t flags); extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t); extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *); extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 72dc7e45c90c..b98106e1a90f 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -236,6 +236,8 @@ struct power_supply_config { char **supplied_to; size_t num_supplicants; + + bool no_wakeup_source; }; /* Description of power supply */ @@ -243,8 +245,7 @@ struct power_supply_desc { const char *name; enum power_supply_type type; u8 charge_behaviours; - const enum power_supply_usb_type *usb_types; - size_t num_usb_types; + u32 usb_types; const enum power_supply_property *properties; size_t num_properties; @@ -751,9 +752,9 @@ struct power_supply_battery_info { int temp_alert_max; int temp_min; int temp_max; - struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; + const struct power_supply_battery_ocv_table *ocv_table[POWER_SUPPLY_OCV_TEMP_MAX]; int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; - struct power_supply_resistance_temp_table *resist_table; + const struct power_supply_resistance_temp_table *resist_table; int resist_table_size; const struct power_supply_vbat_ri_table *vbat2ri_discharging; int vbat2ri_discharging_size; @@ -798,15 +799,15 @@ extern bool power_supply_battery_info_has_prop(struct power_supply_battery_info extern int power_supply_battery_info_get_prop(struct power_supply_battery_info *info, enum power_supply_property psp, union power_supply_propval *val); -extern int power_supply_ocv2cap_simple(struct power_supply_battery_ocv_table *table, +extern int power_supply_ocv2cap_simple(const struct power_supply_battery_ocv_table *table, int table_len, int ocv); -extern struct power_supply_battery_ocv_table * +extern const struct power_supply_battery_ocv_table * power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, int temp, int *table_len); extern int power_supply_batinfo_ocv2cap(struct power_supply_battery_info *info, int ocv, int temp); extern int -power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table, +power_supply_temp2resist_simple(const struct power_supply_resistance_temp_table *table, int table_len, int temp); extern int power_supply_vbat2ri(struct power_supply_battery_info *info, int vbat_uv, bool charging); @@ -864,8 +865,6 @@ static inline int power_supply_set_property(struct power_supply *psy, const union power_supply_propval *val) { return 0; } #endif -extern int power_supply_property_is_writeable(struct power_supply *psy, - enum power_supply_property psp); extern void power_supply_external_power_changed(struct power_supply *psy); extern struct power_supply *__must_check @@ -873,17 +872,9 @@ power_supply_register(struct device *parent, const struct power_supply_desc *desc, const struct power_supply_config *cfg); extern struct power_supply *__must_check -power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg); -extern struct power_supply *__must_check devm_power_supply_register(struct device *parent, const struct power_supply_desc *desc, const struct power_supply_config *cfg); -extern struct power_supply *__must_check -devm_power_supply_register_no_ws(struct device *parent, - const struct power_supply_desc *desc, - const struct power_supply_config *cfg); extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); @@ -946,19 +937,6 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp) return false; } -#ifdef CONFIG_POWER_SUPPLY_HWMON -int power_supply_add_hwmon_sysfs(struct power_supply *psy); -void power_supply_remove_hwmon_sysfs(struct power_supply *psy); -#else -static inline int power_supply_add_hwmon_sysfs(struct power_supply *psy) -{ - return 0; -} - -static inline -void power_supply_remove_hwmon_sysfs(struct power_supply *psy) {} -#endif - #ifdef CONFIG_SYSFS ssize_t power_supply_charge_behaviour_show(struct device *dev, unsigned int available_behaviours, diff --git a/include/linux/prandom.h b/include/linux/prandom.h index f7f1e5251c67..f2ed5b72b3d6 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/once.h> +#include <linux/percpu.h> #include <linux/random.h> struct rnd_state { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index ce76f1a45722..ca86235ac15c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) extern bool preempt_model_none(void); extern bool preempt_model_voluntary(void); extern bool preempt_model_full(void); +extern bool preempt_model_lazy(void); #else @@ -502,6 +503,11 @@ static inline bool preempt_model_full(void) return IS_ENABLED(CONFIG_PREEMPT); } +static inline bool preempt_model_lazy(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_LAZY); +} + #endif static inline bool preempt_model_rt(void) @@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void) */ static inline bool preempt_model_preemptible(void) { - return preempt_model_full() || preempt_model_rt(); + return preempt_model_full() || preempt_model_lazy() || preempt_model_rt(); } #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index b937cefcb31c..4217a9f412b2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -9,6 +9,8 @@ #include <linux/ratelimit_types.h> #include <linux/once_lite.h> +struct console; + extern const char linux_banner[]; extern const char linux_proc_banner[]; @@ -161,15 +163,19 @@ int _printk(const char *fmt, ...); */ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); -extern void __printk_safe_enter(void); -extern void __printk_safe_exit(void); +extern void __printk_deferred_enter(void); +extern void __printk_deferred_exit(void); + +extern void printk_force_console_enter(void); +extern void printk_force_console_exit(void); + /* * The printk_deferred_enter/exit macros are available only as a hack for * some code paths that need to defer all printk console printing. Interrupts * must be disabled for the deferred duration. */ -#define printk_deferred_enter __printk_safe_enter -#define printk_deferred_exit __printk_safe_exit +#define printk_deferred_enter() __printk_deferred_enter() +#define printk_deferred_exit() __printk_deferred_exit() /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -197,6 +203,10 @@ extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); void console_try_replay_all(void); +void printk_legacy_allow_panic_sync(void); +extern bool nbcon_device_try_acquire(struct console *con); +extern void nbcon_device_release(struct console *con); +void nbcon_atomic_flush_unsafe(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -222,6 +232,14 @@ static inline void printk_deferred_exit(void) { } +static inline void printk_force_console_enter(void) +{ +} + +static inline void printk_force_console_exit(void) +{ +} + static inline int printk_ratelimit(void) { return 0; @@ -279,6 +297,24 @@ static inline void printk_trigger_flush(void) static inline void console_try_replay_all(void) { } + +static inline void printk_legacy_allow_panic_sync(void) +{ +} + +static inline bool nbcon_device_try_acquire(struct console *con) +{ + return false; +} + +static inline void nbcon_device_release(struct console *con) +{ +} + +static inline void nbcon_atomic_flush_unsafe(void) +{ +} + #endif bool this_cpu_in_panic(void); diff --git a/include/linux/prmt.h b/include/linux/prmt.h index 24da8364b919..9c094294403f 100644 --- a/include/linux/prmt.h +++ b/include/linux/prmt.h @@ -2,6 +2,11 @@ #ifdef CONFIG_ACPI_PRMT void init_prmt(void); +int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer); #else static inline void init_prmt(void) { } +static inline int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer) +{ + return -EOPNOTSUPP; +} #endif diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 638507a3c8ff..fed601053c51 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -182,7 +182,7 @@ struct pstore_info { struct module *owner; const char *name; - spinlock_t buf_lock; + raw_spinlock_t buf_lock; char *buf; size_t bufsize; diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 1b5a953c6bbc..3a74f69e0b59 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -10,7 +10,7 @@ #ifndef _PTP_CLASSIFY_H_ #define _PTP_CLASSIFY_H_ -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/ip.h> #include <linux/ktime.h> #include <linux/skbuff.h> diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 6e4b8206c7d0..c892d22ce0a7 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -47,10 +47,12 @@ struct system_device_crosststamp; * struct ptp_system_timestamp - system time corresponding to a PHC timestamp * @pre_ts: system timestamp before capturing PHC * @post_ts: system timestamp after capturing PHC + * @clockid: clock-base used for capturing the system timestamps */ struct ptp_system_timestamp { struct timespec64 pre_ts; struct timespec64 post_ts; + clockid_t clockid; }; /** @@ -457,14 +459,40 @@ static inline ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, static inline void ptp_read_system_prets(struct ptp_system_timestamp *sts) { - if (sts) - ktime_get_real_ts64(&sts->pre_ts); + if (sts) { + switch (sts->clockid) { + case CLOCK_REALTIME: + ktime_get_real_ts64(&sts->pre_ts); + break; + case CLOCK_MONOTONIC: + ktime_get_ts64(&sts->pre_ts); + break; + case CLOCK_MONOTONIC_RAW: + ktime_get_raw_ts64(&sts->pre_ts); + break; + default: + break; + } + } } static inline void ptp_read_system_postts(struct ptp_system_timestamp *sts) { - if (sts) - ktime_get_real_ts64(&sts->post_ts); + if (sts) { + switch (sts->clockid) { + case CLOCK_REALTIME: + ktime_get_real_ts64(&sts->post_ts); + break; + case CLOCK_MONOTONIC: + ktime_get_ts64(&sts->post_ts); + break; + case CLOCK_MONOTONIC_RAW: + ktime_get_raw_ts64(&sts->post_ts); + break; + default: + break; + } + } } #endif diff --git a/include/linux/pwm.h b/include/linux/pwm.h index f8c2dc12dbd3..6853e29d9674 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -8,7 +8,7 @@ #include <linux/mutex.h> #include <linux/of.h> -MODULE_IMPORT_NS(PWM); +MODULE_IMPORT_NS("PWM"); struct pwm_chip; @@ -49,6 +49,31 @@ enum { PWMF_EXPORTED = 1, }; +/** + * struct pwm_waveform - description of a PWM waveform + * @period_length_ns: PWM period + * @duty_length_ns: PWM duty cycle + * @duty_offset_ns: offset of the rising edge from the period's start + * + * This is a representation of a PWM waveform alternative to struct pwm_state + * below. It's more expressive than struct pwm_state as it contains a + * duty_offset_ns and so can represent offsets other than zero (with .polarity = + * PWM_POLARITY_NORMAL) and period - duty_cycle (.polarity = + * PWM_POLARITY_INVERSED). + * + * Note there is no explicit bool for enabled. A "disabled" PWM is represented + * by .period_length_ns = 0. Note further that the behaviour of a "disabled" PWM + * is undefined. Depending on the hardware's capabilities it might drive the + * active or inactive level, go high-z or even continue to toggle. + * + * The unit for all three members is nanoseconds. + */ +struct pwm_waveform { + u64 period_length_ns; + u64 duty_length_ns; + u64 duty_offset_ns; +}; + /* * struct pwm_state - state of a PWM channel * @period: PWM period (in nanoseconds) @@ -251,6 +276,11 @@ struct pwm_capture { * @request: optional hook for requesting a PWM * @free: optional hook for freeing a PWM * @capture: capture and report PWM signal + * @sizeof_wfhw: size (in bytes) of driver specific waveform presentation + * @round_waveform_tohw: convert a struct pwm_waveform to driver specific presentation + * @round_waveform_fromhw: convert a driver specific waveform presentation to struct pwm_waveform + * @read_waveform: read driver specific waveform presentation from hardware + * @write_waveform: write driver specific waveform presentation to hardware * @apply: atomically apply a new PWM config * @get_state: get the current PWM state. */ @@ -259,6 +289,17 @@ struct pwm_ops { void (*free)(struct pwm_chip *chip, struct pwm_device *pwm); int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); + + size_t sizeof_wfhw; + int (*round_waveform_tohw)(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_waveform *wf, void *wfhw); + int (*round_waveform_fromhw)(struct pwm_chip *chip, struct pwm_device *pwm, + const void *wfhw, struct pwm_waveform *wf); + int (*read_waveform)(struct pwm_chip *chip, struct pwm_device *pwm, + void *wfhw); + int (*write_waveform)(struct pwm_chip *chip, struct pwm_device *pwm, + const void *wfhw); + int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm, const struct pwm_state *state); int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm, @@ -275,6 +316,9 @@ struct pwm_ops { * @of_xlate: request a PWM device given a device tree PWM specifier * @atomic: can the driver's ->apply() be called in atomic context * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip + * @operational: signals if the chip can be used (or is already deregistered) + * @nonatomic_lock: mutex for nonatomic chips + * @atomic_lock: mutex for atomic chips * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { @@ -290,6 +334,16 @@ struct pwm_chip { /* only used internally by the PWM framework */ bool uses_pwmchip_alloc; + bool operational; + union { + /* + * depending on the chip being atomic or not either the mutex or + * the spinlock is used. It protects .operational and + * synchronizes the callbacks in .ops + */ + struct mutex nonatomic_lock; + spinlock_t atomic_lock; + }; struct pwm_device pwms[] __counted_by(npwm); }; @@ -309,9 +363,14 @@ static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) } #if IS_ENABLED(CONFIG_PWM) -/* PWM user APIs */ + +/* PWM consumer APIs */ +int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); +int pwm_get_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); +int pwm_set_waveform_might_sleep(struct pwm_device *pwm, const struct pwm_waveform *wf, bool exact); int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); int pwm_apply_atomic(struct pwm_device *pwm, const struct pwm_state *state); +int pwm_get_state_hw(struct pwm_device *pwm, struct pwm_state *state); int pwm_adjust_config(struct pwm_device *pwm); /** @@ -394,9 +453,6 @@ static inline bool pwm_might_sleep(struct pwm_device *pwm) } /* PWM provider APIs */ -int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, - unsigned long timeout); - void pwmchip_put(struct pwm_chip *chip); struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); struct pwm_chip *devm_pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); @@ -439,6 +495,11 @@ static inline int pwm_apply_atomic(struct pwm_device *pwm, return -EOPNOTSUPP; } +static inline int pwm_get_state_hw(struct pwm_device *pwm, struct pwm_state *state) +{ + return -EOPNOTSUPP; +} + static inline int pwm_adjust_config(struct pwm_device *pwm) { return -EOPNOTSUPP; @@ -462,13 +523,6 @@ static inline void pwm_disable(struct pwm_device *pwm) might_sleep(); } -static inline int pwm_capture(struct pwm_device *pwm, - struct pwm_capture *result, - unsigned long timeout) -{ - return -EINVAL; -} - static inline void pwmchip_put(struct pwm_chip *chip) { } diff --git a/include/linux/quota.h b/include/linux/quota.h index 07071e64abf3..89a0d83ddad0 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -526,7 +526,7 @@ struct quota_info { const struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */ }; -int register_quota_format(struct quota_format_type *fmt); +void register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); struct quota_module_name { diff --git a/include/linux/random.h b/include/linux/random.h index b0a940af4fff..333cecfca93f 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -145,13 +145,6 @@ declare_get_random_var_wait(u64, u32) declare_get_random_var_wait(long, unsigned long) #undef declare_get_random_var -/* - * This is designed to be standalone for just prandom - * users, but for now we include it from <linux/random.h> - * for legacy reasons. - */ -#include <linux/prandom.h> - #ifdef CONFIG_SMP int random_prepare_cpu(unsigned int cpu); int random_online_cpu(unsigned int cpu); diff --git a/include/linux/range.h b/include/linux/range.h index 6ad0b73cb7ad..d7f98e1285d7 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,11 +13,20 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } -static inline bool range_contains(struct range *r1, struct range *r2) +/* True if r1 completely contains r2 */ +static inline bool range_contains(const struct range *r1, + const struct range *r2) { return r1->start <= r2->start && r1->end >= r2->end; } +/* True if any part of r1 overlaps r2 */ +static inline bool range_overlaps(const struct range *r1, + const struct range *r2) +{ + return r1->start <= r2->end && r1->end >= r2->start; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); @@ -31,4 +40,10 @@ int clean_sort_range(struct range *range, int az); void sort_range(struct range *range, int nr_range); +#define DEFINE_RANGE(_start, _end) \ +(struct range) { \ + .start = (_start), \ + .end = (_end), \ + } + #endif diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index 002266693e50..765232ce0b5e 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -19,8 +19,8 @@ struct ratelimit_state { int burst; int printed; int missed; + unsigned int flags; unsigned long begin; - unsigned long flags; }; #define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \ diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index f7edca369eda..7c173aa64e1e 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -245,6 +245,42 @@ rb_find_add(struct rb_node *node, struct rb_root *tree, } /** + * rb_find_add_rcu() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Adds a Store-Release for link_node. + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_rcu(struct rb_node *node, struct rb_root *tree, + int (*cmp)(struct rb_node *, const struct rb_node *)) +{ + struct rb_node **link = &tree->rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) + link = &parent->rb_left; + else if (c > 0) + link = &parent->rb_right; + else + return parent; + } + + rb_link_node_rcu(node, parent, link); + rb_insert_color(node, tree); + return NULL; +} + +/** * rb_find() - find @key in tree @tree * @key: key to match * @tree: tree to search @@ -273,6 +309,37 @@ rb_find(const void *key, const struct rb_root *tree, } /** + * rb_find_rcu() - find @key in tree @tree + * @key: key to match + * @tree: tree to search + * @cmp: operator defining the node order + * + * Notably, tree descent vs concurrent tree rotations is unsound and can result + * in false-negatives. + * + * Returns the rb_node matching @key or NULL. + */ +static __always_inline struct rb_node * +rb_find_rcu(const void *key, const struct rb_root *tree, + int (*cmp)(const void *key, const struct rb_node *)) +{ + struct rb_node *node = tree->rb_node; + + while (node) { + int c = cmp(key, node); + + if (c < 0) + node = rcu_dereference_raw(node->rb_left); + else if (c > 0) + node = rcu_dereference_raw(node->rb_right); + else + return node; + } + + return NULL; +} + +/** * rb_find_first() - find the first @key in @tree * @key: key to match * @tree: tree to search diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index 6a0999c26c7c..2f630eb8307e 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -14,7 +14,7 @@ * * If we need to allow unconditional lookups (say as required for NMI context * usage) we need a more complex setup; this data structure provides this by - * employing the latch technique -- see @raw_write_seqcount_latch -- to + * employing the latch technique -- see @write_seqcount_latch_begin -- to * implement a latched RB-tree which does allow for unconditional lookups by * virtue of always having (at least) one stable copy of the tree. * @@ -132,7 +132,7 @@ __lt_find(void *key, struct latch_tree_root *ltr, int idx, * @ops: operators defining the node order * * It inserts @node into @root in an ordered fashion such that we can always - * observe one complete tree. See the comment for raw_write_seqcount_latch(). + * observe one complete tree. See the comment for write_seqcount_latch_begin(). * * The inserts use rcu_assign_pointer() to publish the element such that the * tree structure is stored before we can observe the new @node. @@ -145,10 +145,11 @@ latch_tree_insert(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch_begin(&root->seq); __lt_insert(node, root, 0, ops->less); - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch(&root->seq); __lt_insert(node, root, 1, ops->less); + write_seqcount_latch_end(&root->seq); } /** @@ -159,7 +160,7 @@ latch_tree_insert(struct latch_tree_node *node, * * Removes @node from the trees @root in an ordered fashion such that we can * always observe one complete tree. See the comment for - * raw_write_seqcount_latch(). + * write_seqcount_latch_begin(). * * It is assumed that @node will observe one RCU quiescent state before being * reused of freed. @@ -172,10 +173,11 @@ latch_tree_erase(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch_begin(&root->seq); __lt_erase(node, root, 0); - raw_write_seqcount_latch(&root->seq); + write_seqcount_latch(&root->seq); __lt_erase(node, root, 1); + write_seqcount_latch_end(&root->seq); } /** @@ -204,9 +206,9 @@ latch_tree_find(void *key, struct latch_tree_root *root, unsigned int seq; do { - seq = raw_read_seqcount_latch(&root->seq); + seq = read_seqcount_latch(&root->seq); node = __lt_find(key, root, seq & 1, ops->comp); - } while (raw_read_seqcount_latch_retry(&root->seq, seq)); + } while (read_seqcount_latch_retry(&root->seq, seq)); return node; } diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index ba95c06675e1..2fdc2208f1ca 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -185,11 +185,7 @@ struct rcu_cblist { * ---------------------------------------------------------------------------- */ #define SEGCBLIST_ENABLED BIT(0) -#define SEGCBLIST_RCU_CORE BIT(1) -#define SEGCBLIST_LOCKING BIT(2) -#define SEGCBLIST_KTHREAD_CB BIT(3) -#define SEGCBLIST_KTHREAD_GP BIT(4) -#define SEGCBLIST_OFFLOADED BIT(5) +#define SEGCBLIST_OFFLOADED BIT(1) struct rcu_segcblist { struct rcu_head *head; diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3dc1e58865f7..14dfa6008467 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -191,7 +191,10 @@ static inline void hlist_del_init_rcu(struct hlist_node *n) * @old : the element to be replaced * @new : the new element to insert * - * The @old entry will be replaced with the @new entry atomically. + * The @old entry will be replaced with the @new entry atomically from + * the perspective of concurrent readers. It is the caller's responsibility + * to synchronize with concurrent updaters, if any. + * * Note: @old should not be empty. */ static inline void list_replace_rcu(struct list_head *old, @@ -519,7 +522,9 @@ static inline void hlist_del_rcu(struct hlist_node *n) * @old : the element to be replaced * @new : the new element to insert * - * The @old entry will be replaced with the @new entry atomically. + * The @old entry will be replaced with the @new entry atomically from + * the perspective of concurrent readers. It is the caller's responsibility + * to synchronize with concurrent updaters, if any. */ static inline void hlist_replace_rcu(struct hlist_node *old, struct hlist_node *new) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 13f6f00aecf9..48e5c03df1dd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -34,10 +34,12 @@ #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#define RCU_SEQ_CTR_SHIFT 2 +#define RCU_SEQ_STATE_MASK ((1 << RCU_SEQ_CTR_SHIFT) - 1) + /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); void rcu_barrier_tasks(void); -void rcu_barrier_tasks_rude(void); void synchronize_rcu(void); struct rcu_gp_oldstate; @@ -144,11 +146,18 @@ void rcu_init_nohz(void); int rcu_nocb_cpu_offload(int cpu); int rcu_nocb_cpu_deoffload(int cpu); void rcu_nocb_flush_deferred_wakeup(void); + +#define RCU_NOCB_LOCKDEP_WARN(c, s) RCU_LOCKDEP_WARN(c, s) + #else /* #ifdef CONFIG_RCU_NOCB_CPU */ + static inline void rcu_init_nohz(void) { } static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; } static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; } static inline void rcu_nocb_flush_deferred_wakeup(void) { } + +#define RCU_NOCB_LOCKDEP_WARN(c, s) + #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /* @@ -165,6 +174,7 @@ static inline void rcu_nocb_flush_deferred_wakeup(void) { } } while (0) void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); +void rcu_tasks_torture_stats_print(char *tt, char *tf); # else # define rcu_tasks_classic_qs(t, preempt) do { } while (0) # define call_rcu_tasks call_rcu @@ -191,6 +201,7 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); rcu_tasks_trace_qs_blkd(t); \ } \ } while (0) +void rcu_tasks_trace_torture_stats_print(char *tt, char *tf); # else # define rcu_tasks_trace_qs(t) do { } while (0) # endif @@ -202,8 +213,8 @@ do { \ } while (0) # ifdef CONFIG_TASKS_RUDE_RCU -void call_rcu_tasks_rude(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks_rude(void); +void rcu_tasks_rude_torture_stats_print(char *tt, char *tf); # endif #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) @@ -390,7 +401,7 @@ static inline int debug_lockdep_rcu_enabled(void) */ #define RCU_LOCKDEP_WARN(c, s) \ do { \ - static bool __section(".data.unlikely") __warned; \ + static bool __section(".data..unlikely") __warned; \ if (debug_lockdep_rcu_enabled() && (c) && \ debug_lockdep_rcu_enabled() && !__warned) { \ __warned = true; \ diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index eda493200663..e6c44eb428ab 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -10,6 +10,7 @@ #include <linux/sched.h> #include <linux/rcupdate.h> +#include <linux/cleanup.h> extern struct lockdep_map rcu_trace_lock_map; @@ -98,4 +99,8 @@ static inline void rcu_read_lock_trace(void) { BUG(); } static inline void rcu_read_unlock_trace(void) { BUG(); } #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ +DEFINE_LOCK_GUARD_0(rcu_tasks_trace, + rcu_read_lock_trace(), + rcu_read_unlock_trace()) + #endif /* __LINUX_RCUPDATE_TRACE_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index d9ac7b136aea..fe42315f667f 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr) kvfree(ptr); } +static inline void kvfree_rcu_barrier(void) +{ + rcu_barrier(); +} + #ifdef CONFIG_KASAN_GENERIC void kvfree_call_rcu(struct rcu_head *head, void *ptr); #else @@ -158,9 +163,8 @@ void rcu_scheduler_starting(void); static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } -static inline void rcu_momentary_dyntick_idle(void) { } +static inline void rcu_momentary_eqs(void) { } static inline void kfree_rcu_scheduler_running(void) { } -static inline bool rcu_gp_might_be_stalled(void) { return false; } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 254244202ea9..27d86d912781 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,11 +35,11 @@ static inline void rcu_virt_note_context_switch(void) void synchronize_rcu_expedited(void); void kvfree_call_rcu(struct rcu_head *head, void *ptr); +void kvfree_rcu_barrier(void); void rcu_barrier(void); -void rcu_momentary_dyntick_idle(void); +void rcu_momentary_eqs(void); void kfree_rcu_scheduler_running(void); -bool rcu_gp_might_be_stalled(void); struct rcu_gp_oldstate { unsigned long rgos_norm; diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 122e38161acb..fd41baccbf3e 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -54,7 +54,14 @@ struct sdw_slave; #define REGMAP_UPSHIFT(s) (-(s)) #define REGMAP_DOWNSHIFT(s) (s) -/* An enum of all the supported cache types */ +/* + * The supported cache types, the default is no cache. Any new caches + * should usually use the maple tree cache unless they specifically + * require that there are never any allocations at runtime and can't + * provide defaults in which case they should use the flat cache. The + * rbtree cache *may* have some performance advantage for very low end + * systems that make heavy use of cache syncs but is mainly legacy. + */ enum regcache_type { REGCACHE_NONE, REGCACHE_RBTREE, @@ -106,17 +113,17 @@ struct reg_sequence { * @addr: Address to poll * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read + * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read * error return value in case of a error read. In the two former cases, * the last read value at @addr is stored in @val. Must not be called * from atomic context if sleep_us or timeout_us are used. - * - * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. */ #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \ ({ \ @@ -133,20 +140,20 @@ struct reg_sequence { * @addr: Address to poll * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @delay_us: Time to udelay between reads in us (0 tight-loops). - * Should be less than ~10us since udelay is used - * (see Documentation/timers/timers-howto.rst). + * @delay_us: Time to udelay between reads in us (0 tight-loops). Please + * read udelay() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read - * error return value in case of a error read. In the two former cases, - * the last read value at @addr is stored in @val. - * * This is modelled after the readx_poll_timeout_atomic macros in linux/iopoll.h. * * Note: In general regmap cannot be used in atomic context. If you want to use * this macro then first setup your regmap for atomic use (flat or no cache * and MMIO regmap). + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read + * error return value in case of a error read. In the two former cases, + * the last read value at @addr is stored in @val. */ #define regmap_read_poll_timeout_atomic(map, addr, val, cond, delay_us, timeout_us) \ ({ \ @@ -177,17 +184,17 @@ struct reg_sequence { * @field: Regmap field to read from * @val: Unsigned integer variable to read the value into * @cond: Break condition (usually involving @val) - * @sleep_us: Maximum time to sleep between reads in us (0 - * tight-loops). Should be less than ~20ms since usleep_range - * is used (see Documentation/timers/timers-howto.rst). + * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please + * read usleep_range() function description for details and + * limitations. * @timeout_us: Timeout in us, 0 means never timeout * - * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read + * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. + * + * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read * error return value in case of a error read. In the two former cases, * the last read value at @addr is stored in @val. Must not be called * from atomic context if sleep_us or timeout_us are used. - * - * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. */ #define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \ ({ \ @@ -1328,6 +1335,15 @@ static inline int regmap_clear_bits(struct regmap *map, return regmap_update_bits_base(map, reg, bits, 0, NULL, false, false); } +static inline int regmap_assign_bits(struct regmap *map, unsigned int reg, + unsigned int bits, bool value) +{ + if (value) + return regmap_set_bits(map, reg, bits); + else + return regmap_clear_bits(map, reg, bits); +} + int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits); /** @@ -1521,6 +1537,9 @@ struct regmap_irq_chip_data; * struct regmap_irq_chip - Description of a generic regmap irq_chip. * * @name: Descriptive name for IRQ controller. + * @domain_suffix: Name suffix to be appended to end of IRQ domain name. Needed + * when multiple regmap-IRQ controllers are created from same + * device. * * @main_status: Base main status register address. For chips which have * interrupts arranged in separate sub-irq blocks with own IRQ @@ -1606,6 +1625,7 @@ struct regmap_irq_chip_data; */ struct regmap_irq_chip { const char *name; + const char *domain_suffix; unsigned int main_status; unsigned int num_main_status_bits; @@ -1792,6 +1812,13 @@ static inline int regmap_clear_bits(struct regmap *map, return -EINVAL; } +static inline int regmap_assign_bits(struct regmap *map, unsigned int reg, + unsigned int bits, bool value) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits) { diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index d986ec13092e..8c3c372ad735 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -168,6 +168,29 @@ int devm_regulator_get_enable_read_voltage(struct device *dev, const char *id); void regulator_put(struct regulator *regulator); void devm_regulator_put(struct regulator *regulator); +#if IS_ENABLED(CONFIG_OF) +struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id); +struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id); +#else +static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} +#endif + int regulator_register_supply_alias(struct device *dev, const char *id, struct device *alias_dev, const char *alias_id); @@ -350,6 +373,20 @@ devm_regulator_get_optional(struct device *dev, const char *id) return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct regulator *__must_check devm_of_regulator_get_optional(struct device *dev, + struct device_node *node, + const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline void regulator_put(struct regulator *regulator) { } @@ -452,6 +489,14 @@ static inline int of_regulator_bulk_get_all(struct device *dev, struct device_no return 0; } +static inline int devm_regulator_bulk_get_const( + struct device *dev, int num_consumers, + const struct regulator_bulk_data *in_consumers, + struct regulator_bulk_data **out_consumers) +{ + return 0; +} + static inline int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index f230a472ccd3..5b66caf1695d 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -269,6 +269,11 @@ enum regulator_type { * config but it cannot store it for later usage. * Callback should return 0 on success or negative ERRNO * indicating failure. + * @init_cb: Optional callback called after the parsing of init_data. + * Allows the regulator to perform runtime init if necessary, + * such as synching the regulator and the parsed constraints. + * Callback should return 0 on success or negative ERRNO + * indicating failure. * @id: Numerical identifier for the regulator. * @ops: Regulator operations table. * @irq: Interrupt number for the regulator. @@ -365,6 +370,8 @@ struct regulator_desc { int (*of_parse_cb)(struct device_node *, const struct regulator_desc *, struct regulator_config *); + int (*init_cb)(struct regulator_dev *, + struct regulator_config *); int id; unsigned int continuous_voltage_range:1; unsigned n_voltages; diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index 0cd76d264727..b3db09a7429b 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -273,8 +273,6 @@ struct regulator_consumer_supply { * be usable. * @num_consumer_supplies: Number of consumer device supplies. * @consumer_supplies: Consumer device supply configuration. - * - * @regulator_init: Callback invoked when the regulator has been registered. * @driver_data: Data passed to regulator_init. */ struct regulator_init_data { @@ -285,8 +283,7 @@ struct regulator_init_data { int num_consumer_supplies; struct regulator_consumer_supply *consumer_supplies; - /* optional regulator machine specific init */ - int (*regulator_init)(void *driver_data); + /* optional regulator machine specific data */ void *driver_data; /* core does not touch this */ }; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index b0875b99e811..d94abba1c716 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -248,6 +248,7 @@ struct resctrl_schema { /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); +u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); /* diff --git a/include/linux/reset.h b/include/linux/reset.h index 514ddf003efc..2986ced69a02 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -25,6 +25,48 @@ struct reset_control_bulk_data { struct reset_control *rstc; }; +#define RESET_CONTROL_FLAGS_BIT_SHARED BIT(0) /* not exclusive */ +#define RESET_CONTROL_FLAGS_BIT_OPTIONAL BIT(1) +#define RESET_CONTROL_FLAGS_BIT_ACQUIRED BIT(2) /* iff exclusive, not released */ +#define RESET_CONTROL_FLAGS_BIT_DEASSERTED BIT(3) + +/** + * enum reset_control_flags - Flags that can be passed to the reset_control_get functions + * to determine the type of reset control. + * These values cannot be OR'd. + * + * @RESET_CONTROL_EXCLUSIVE: exclusive, acquired, + * @RESET_CONTROL_EXCLUSIVE_DEASSERTED: exclusive, acquired, deasserted + * @RESET_CONTROL_EXCLUSIVE_RELEASED: exclusive, released, + * @RESET_CONTROL_SHARED: shared + * @RESET_CONTROL_SHARED_DEASSERTED: shared, deasserted + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE: optional, exclusive, acquired + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED: optional, exclusive, acquired, deasserted + * @RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED: optional, exclusive, released + * @RESET_CONTROL_OPTIONAL_SHARED: optional, shared + * @RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED: optional, shared, deasserted + */ +enum reset_control_flags { + RESET_CONTROL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_EXCLUSIVE_DEASSERTED = RESET_CONTROL_FLAGS_BIT_ACQUIRED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, + RESET_CONTROL_EXCLUSIVE_RELEASED = 0, + RESET_CONTROL_SHARED = RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_SHARED_DEASSERTED = RESET_CONTROL_FLAGS_BIT_SHARED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_ACQUIRED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_ACQUIRED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED = RESET_CONTROL_FLAGS_BIT_OPTIONAL, + RESET_CONTROL_OPTIONAL_SHARED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_SHARED, + RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED = RESET_CONTROL_FLAGS_BIT_OPTIONAL | + RESET_CONTROL_FLAGS_BIT_SHARED | + RESET_CONTROL_FLAGS_BIT_DEASSERTED, +}; + #ifdef CONFIG_RESET_CONTROLLER int reset_control_reset(struct reset_control *rstc); @@ -42,30 +84,25 @@ int reset_control_bulk_acquire(int num_rstcs, struct reset_control_bulk_data *rs void reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs); struct reset_control *__of_reset_control_get(struct device_node *node, - const char *id, int index, bool shared, - bool optional, bool acquired); + const char *id, int index, enum reset_control_flags flags); struct reset_control *__reset_control_get(struct device *dev, const char *id, - int index, bool shared, - bool optional, bool acquired); + int index, enum reset_control_flags flags); void reset_control_put(struct reset_control *rstc); int __reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired); + enum reset_control_flags flags); void reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs); int __device_reset(struct device *dev, bool optional); struct reset_control *__devm_reset_control_get(struct device *dev, - const char *id, int index, bool shared, - bool optional, bool acquired); + const char *id, int index, enum reset_control_flags flags); int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired); + enum reset_control_flags flags); struct reset_control *devm_reset_control_array_get(struct device *dev, - bool shared, bool optional); -struct reset_control *of_reset_control_array_get(struct device_node *np, - bool shared, bool optional, - bool acquired); + enum reset_control_flags flags); +struct reset_control *of_reset_control_array_get(struct device_node *np, enum reset_control_flags); int reset_control_get_count(struct device *dev); @@ -116,17 +153,19 @@ static inline int __device_reset(struct device *dev, bool optional) static inline struct reset_control *__of_reset_control_get( struct device_node *node, - const char *id, int index, bool shared, - bool optional, bool acquired) + const char *id, int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__reset_control_get( struct device *dev, const char *id, - int index, bool shared, bool optional, - bool acquired) + int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } @@ -162,8 +201,10 @@ reset_control_bulk_release(int num_rstcs, struct reset_control_bulk_data *rstcs) static inline int __reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? 0 : -EOPNOTSUPP; } @@ -174,30 +215,36 @@ reset_control_bulk_put(int num_rstcs, struct reset_control_bulk_data *rstcs) static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, - int index, bool shared, bool optional, - bool acquired) + int index, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline int __devm_reset_control_bulk_get(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs, - bool shared, bool optional, bool acquired) + enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? 0 : -EOPNOTSUPP; } static inline struct reset_control * -devm_reset_control_array_get(struct device *dev, bool shared, bool optional) +devm_reset_control_array_get(struct device *dev, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control * -of_reset_control_array_get(struct device_node *np, bool shared, bool optional, - bool acquired) +of_reset_control_array_get(struct device_node *np, enum reset_control_flags flags) { + bool optional = flags & RESET_CONTROL_FLAGS_BIT_OPTIONAL; + return optional ? NULL : ERR_PTR(-ENOTSUPP); } @@ -236,7 +283,7 @@ static inline int device_reset_optional(struct device *dev) static inline struct reset_control * __must_check reset_control_get_exclusive(struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, false, true); + return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -253,7 +300,7 @@ static inline int __must_check reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE); } /** @@ -274,7 +321,7 @@ static inline struct reset_control * __must_check reset_control_get_exclusive_released(struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, false, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -295,7 +342,7 @@ static inline int __must_check reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -316,7 +363,8 @@ static inline int __must_check reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -344,7 +392,7 @@ reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_r static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, true, false, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); } /** @@ -361,7 +409,7 @@ static inline int __must_check reset_control_bulk_get_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); } /** @@ -378,7 +426,7 @@ reset_control_bulk_get_shared(struct device *dev, int num_rstcs, static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, false, true, true); + return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -398,7 +446,7 @@ static inline int __must_check reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -415,7 +463,7 @@ reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __reset_control_get(dev, id, 0, true, true, false); + return __reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -435,7 +483,7 @@ static inline int __must_check reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); + return __reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -451,7 +499,7 @@ reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, false, false, true); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_EXCLUSIVE); } /** @@ -471,7 +519,7 @@ static inline struct reset_control *of_reset_control_get_exclusive( static inline struct reset_control *of_reset_control_get_optional_exclusive( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, false, true, true); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -496,7 +544,7 @@ static inline struct reset_control *of_reset_control_get_optional_exclusive( static inline struct reset_control *of_reset_control_get_shared( struct device_node *node, const char *id) { - return __of_reset_control_get(node, id, 0, true, false, false); + return __of_reset_control_get(node, id, 0, RESET_CONTROL_SHARED); } /** @@ -513,7 +561,7 @@ static inline struct reset_control *of_reset_control_get_shared( static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, false, false, true); + return __of_reset_control_get(node, NULL, index, RESET_CONTROL_EXCLUSIVE); } /** @@ -541,7 +589,7 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index( static inline struct reset_control *of_reset_control_get_shared_by_index( struct device_node *node, int index) { - return __of_reset_control_get(node, NULL, index, true, false, false); + return __of_reset_control_get(node, NULL, index, RESET_CONTROL_SHARED); } /** @@ -560,7 +608,26 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, false, true); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE); +} + +/** + * devm_reset_control_get_exclusive_deasserted - resource managed + * reset_control_get_exclusive() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_exclusive() + reset_control_deassert(). For reset + * controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See reset_control_get_exclusive() for more information. + */ +static inline struct reset_control * __must_check +devm_reset_control_get_exclusive_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_DEASSERTED); } /** @@ -580,7 +647,8 @@ static inline int __must_check devm_reset_control_bulk_get_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_EXCLUSIVE); } /** @@ -599,7 +667,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_exclusive_released(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, false, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -619,7 +687,8 @@ static inline int __must_check devm_reset_control_bulk_get_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, false, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_EXCLUSIVE_RELEASED); } /** @@ -638,7 +707,7 @@ static inline struct reset_control * __must_check devm_reset_control_get_optional_exclusive_released(struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, true, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -658,7 +727,8 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE_RELEASED); } /** @@ -673,7 +743,26 @@ devm_reset_control_bulk_get_optional_exclusive_released(struct device *dev, int static inline struct reset_control *devm_reset_control_get_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true, false, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED); +} + +/** + * devm_reset_control_get_shared_deasserted - resource managed + * reset_control_get_shared() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_shared() + reset_control_deassert(). For reset + * controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_shared() for more information. + */ +static inline struct reset_control * __must_check +devm_reset_control_get_shared_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_SHARED_DEASSERTED); } /** @@ -693,7 +782,29 @@ static inline int __must_check devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_SHARED); +} + +/** + * devm_reset_control_bulk_get_shared_deasserted - resource managed + * reset_control_bulk_get_shared() + + * reset_control_bulk_deassert() + * @dev: device to be reset by the controller + * @num_rstcs: number of entries in rstcs array + * @rstcs: array of struct reset_control_bulk_data with reset line names set + * + * Managed reset_control_bulk_get_shared() + reset_control_bulk_deassert(). For + * reset controllers returned from this function, reset_control_bulk_assert() + + * reset_control_bulk_put() are called automatically on driver detach. + * + * See devm_reset_control_bulk_get_shared() for more information. + */ +static inline int __must_check +devm_reset_control_bulk_get_shared_deasserted(struct device *dev, int num_rstcs, + struct reset_control_bulk_data *rstcs) +{ + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_SHARED_DEASSERTED); } /** @@ -711,7 +822,26 @@ devm_reset_control_bulk_get_shared(struct device *dev, int num_rstcs, static inline struct reset_control *devm_reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, false, true, true); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE); +} + +/** + * devm_reset_control_get_optional_exclusive_deasserted - resource managed + * reset_control_get_optional_exclusive() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_exclusive() + reset_control_deassert(). + * For reset controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_optional_exclusive() for more information. + */ +static inline struct reset_control * +devm_reset_control_get_optional_exclusive_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_EXCLUSIVE_DEASSERTED); } /** @@ -731,7 +861,8 @@ static inline int __must_check devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, + RESET_CONTROL_OPTIONAL_EXCLUSIVE); } /** @@ -749,7 +880,26 @@ devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs static inline struct reset_control *devm_reset_control_get_optional_shared( struct device *dev, const char *id) { - return __devm_reset_control_get(dev, id, 0, true, true, false); + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED); +} + +/** + * devm_reset_control_get_optional_shared_deasserted - resource managed + * reset_control_get_optional_shared() + + * reset_control_deassert() + * @dev: device to be reset by the controller + * @id: reset line name + * + * Managed reset_control_get_optional_shared() + reset_control_deassert(). For + * reset controllers returned from this function, reset_control_assert() + + * reset_control_put() is called automatically on driver detach. + * + * See devm_reset_control_get_optional_shared() for more information. + */ +static inline struct reset_control * +devm_reset_control_get_optional_shared_deasserted(struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, RESET_CONTROL_OPTIONAL_SHARED_DEASSERTED); } /** @@ -769,7 +919,7 @@ static inline int __must_check devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, struct reset_control_bulk_data *rstcs) { - return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, true, false); + return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, RESET_CONTROL_OPTIONAL_SHARED); } /** @@ -787,7 +937,7 @@ devm_reset_control_bulk_get_optional_shared(struct device *dev, int num_rstcs, static inline struct reset_control * devm_reset_control_get_exclusive_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, false, false, true); + return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_EXCLUSIVE); } /** @@ -803,7 +953,7 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index) static inline struct reset_control * devm_reset_control_get_shared_by_index(struct device *dev, int index) { - return __devm_reset_control_get(dev, NULL, index, true, false, false); + return __devm_reset_control_get(dev, NULL, index, RESET_CONTROL_SHARED); } /* @@ -851,54 +1001,54 @@ static inline struct reset_control *devm_reset_control_get_by_index( static inline struct reset_control * devm_reset_control_array_get_exclusive(struct device *dev) { - return devm_reset_control_array_get(dev, false, false); + return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE); } static inline struct reset_control * devm_reset_control_array_get_shared(struct device *dev) { - return devm_reset_control_array_get(dev, true, false); + return devm_reset_control_array_get(dev, RESET_CONTROL_SHARED); } static inline struct reset_control * devm_reset_control_array_get_optional_exclusive(struct device *dev) { - return devm_reset_control_array_get(dev, false, true); + return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } static inline struct reset_control * devm_reset_control_array_get_optional_shared(struct device *dev) { - return devm_reset_control_array_get(dev, true, true); + return devm_reset_control_array_get(dev, RESET_CONTROL_OPTIONAL_SHARED); } static inline struct reset_control * of_reset_control_array_get_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, false, false, true); + return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_exclusive_released(struct device_node *node) { - return of_reset_control_array_get(node, false, false, false); + return of_reset_control_array_get(node, RESET_CONTROL_EXCLUSIVE_RELEASED); } static inline struct reset_control * of_reset_control_array_get_shared(struct device_node *node) { - return of_reset_control_array_get(node, true, false, true); + return of_reset_control_array_get(node, RESET_CONTROL_SHARED); } static inline struct reset_control * of_reset_control_array_get_optional_exclusive(struct device_node *node) { - return of_reset_control_array_get(node, false, true, true); + return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_EXCLUSIVE); } static inline struct reset_control * of_reset_control_array_get_optional_shared(struct device_node *node) { - return of_reset_control_array_get(node, true, true, true); + return of_reset_control_array_get(node, RESET_CONTROL_OPTIONAL_SHARED); } #endif diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 373003ace639..997b34197385 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -147,7 +147,8 @@ void rfkill_destroy(struct rfkill *rfkill); * Prefer to use rfkill_set_hw_state if you don't need any special reason. */ bool rfkill_set_hw_state_reason(struct rfkill *rfkill, - bool blocked, unsigned long reason); + bool blocked, + enum rfkill_hard_block_reasons reason); /** * rfkill_set_hw_state - Set the internal rfkill hardware block state * @rfkill: pointer to the rfkill class to modify. @@ -280,7 +281,7 @@ static inline void rfkill_destroy(struct rfkill *rfkill) static inline bool rfkill_set_hw_state_reason(struct rfkill *rfkill, bool blocked, - unsigned long reason) + enum rfkill_hard_block_reasons reason) { return blocked; } diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fd35d4ec12e1..17fbb7855295 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -89,6 +89,14 @@ void ring_buffer_discard_commit(struct trace_buffer *buffer, struct trace_buffer * __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); +struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, + int order, unsigned long start, + unsigned long range_size, + struct lock_class_key *key); + +bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, + long *data); + /* * Because the ring buffer is generic, if other users of the ring buffer get * traced by ftrace, it can produce lockdep warnings. We need to keep each @@ -100,6 +108,18 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) +/* + * Because the ring buffer is generic, if other users of the ring buffer get + * traced by ftrace, it can produce lockdep warnings. We need to keep each + * ring buffer's lock class separate. + */ +#define ring_buffer_alloc_range(size, flags, order, start, range_size) \ +({ \ + static struct lock_class_key __key; \ + __ring_buffer_alloc_range((size), (flags), (order), (start), \ + (range_size), &__key); \ +}) + typedef bool (*ring_buffer_cond_fn)(void *data); int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, ring_buffer_cond_fn cond, void *data); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 0978c64f49d8..683a04088f3f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -171,7 +171,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, unlink_anon_vmas(next); } -struct anon_vma *folio_get_anon_vma(struct folio *folio); +struct anon_vma *folio_get_anon_vma(const struct folio *folio); /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -194,8 +194,8 @@ enum rmap_level { RMAP_LEVEL_PMD, }; -static inline void __folio_rmap_sanity_checks(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) +static inline void __folio_rmap_sanity_checks(const struct folio *folio, + const struct page *page, int nr_pages, enum rmap_level level) { /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); @@ -331,7 +331,7 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, switch (level) { case RMAP_LEVEL_PTE: if (!folio_test_large(folio)) { - atomic_inc(&page->_mapcount); + atomic_inc(&folio->_mapcount); break; } @@ -425,7 +425,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, if (!folio_test_large(folio)) { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); - atomic_inc(&page->_mapcount); + atomic_inc(&folio->_mapcount); break; } @@ -728,11 +728,8 @@ page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw) } bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); - -/* - * Used by swapoff to help locate where page is expected in vma. - */ -unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); +unsigned long page_address_in_vma(const struct folio *folio, + const struct page *, const struct vm_area_struct *); /* * Cleans the PTEs of shared mappings. @@ -745,7 +742,12 @@ int folio_mkclean(struct folio *); int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, struct vm_area_struct *vma); -void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); +enum rmp_flags { + RMP_LOCKED = 1 << 0, + RMP_USE_SHARED_ZEROPAGE = 1 << 1, +}; + +void remove_migration_ptes(struct folio *src, struct folio *dst, int flags); /* * rmap_walk_control: To control rmap traversing for specific needs @@ -769,14 +771,14 @@ struct rmap_walk_control { bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct folio *folio); - struct anon_vma *(*anon_lock)(struct folio *folio, + struct anon_vma *(*anon_lock)(const struct folio *folio, struct rmap_walk_control *rwc); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); -struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, +struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ diff --git a/include/linux/rpmb.h b/include/linux/rpmb.h new file mode 100644 index 000000000000..cccda73eea4d --- /dev/null +++ b/include/linux/rpmb.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Intel Corp. All rights reserved + * Copyright (C) 2021-2022 Linaro Ltd + */ +#ifndef __RPMB_H__ +#define __RPMB_H__ + +#include <linux/device.h> +#include <linux/types.h> + +/** + * enum rpmb_type - type of underlying storage technology + * + * @RPMB_TYPE_EMMC : emmc (JESD84-B50.1) + * @RPMB_TYPE_UFS : UFS (JESD220) + * @RPMB_TYPE_NVME : NVM Express + */ +enum rpmb_type { + RPMB_TYPE_EMMC, + RPMB_TYPE_UFS, + RPMB_TYPE_NVME, +}; + +/** + * struct rpmb_descr - RPMB description provided by the underlying block device + * + * @type : block device type + * @route_frames : routes frames to and from the RPMB device + * @dev_id : unique device identifier read from the hardware + * @dev_id_len : length of unique device identifier + * @reliable_wr_count: number of sectors that can be written in one access + * @capacity : capacity of the device in units of 128K + * + * @dev_id is intended to be used as input when deriving the authenticaion key. + */ +struct rpmb_descr { + enum rpmb_type type; + int (*route_frames)(struct device *dev, u8 *req, unsigned int req_len, + u8 *resp, unsigned int resp_len); + u8 *dev_id; + size_t dev_id_len; + u16 reliable_wr_count; + u16 capacity; +}; + +/** + * struct rpmb_dev - device which can support RPMB partition + * + * @dev : device + * @id : device_id + * @list_node : linked list node + * @descr : RPMB description + */ +struct rpmb_dev { + struct device dev; + int id; + struct list_head list_node; + struct rpmb_descr descr; +}; + +#define to_rpmb_dev(x) container_of((x), struct rpmb_dev, dev) + +#if IS_ENABLED(CONFIG_RPMB) +struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev); +void rpmb_dev_put(struct rpmb_dev *rdev); +struct rpmb_dev *rpmb_dev_find_device(const void *data, + const struct rpmb_dev *start, + int (*match)(struct device *dev, + const void *data)); +int rpmb_interface_register(struct class_interface *intf); +void rpmb_interface_unregister(struct class_interface *intf); +struct rpmb_dev *rpmb_dev_register(struct device *dev, + struct rpmb_descr *descr); +int rpmb_dev_unregister(struct rpmb_dev *rdev); + +int rpmb_route_frames(struct rpmb_dev *rdev, u8 *req, + unsigned int req_len, u8 *resp, unsigned int resp_len); + +#else +static inline struct rpmb_dev *rpmb_dev_get(struct rpmb_dev *rdev) +{ + return NULL; +} + +static inline void rpmb_dev_put(struct rpmb_dev *rdev) { } + +static inline struct rpmb_dev * +rpmb_dev_find_device(const void *data, const struct rpmb_dev *start, + int (*match)(struct device *dev, const void *data)) +{ + return NULL; +} + +static inline int rpmb_interface_register(struct class_interface *intf) +{ + return -EOPNOTSUPP; +} + +static inline void rpmb_interface_unregister(struct class_interface *intf) +{ +} + +static inline struct rpmb_dev * +rpmb_dev_register(struct device *dev, struct rpmb_descr *descr) +{ + return NULL; +} + +static inline int rpmb_dev_unregister(struct rpmb_dev *dev) +{ + return 0; +} + +static inline int rpmb_route_frames(struct rpmb_dev *rdev, u8 *req, + unsigned int req_len, u8 *resp, + unsigned int resp_len) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_RPMB */ + +#endif /* __RPMB_H__ */ diff --git a/include/linux/rtc/m48t59.h b/include/linux/rtc/m48t59.h index 9465d5405fe2..373ba77071c6 100644 --- a/include/linux/rtc/m48t59.h +++ b/include/linux/rtc/m48t59.h @@ -56,6 +56,9 @@ struct m48t59_plat_data { void __iomem *ioaddr; /* offset to RTC registers, automatically set according to the type */ unsigned int offset; + + /* YY digits (in RTC) are offset, i.e. year is 1900 + yy_offset + YY */ + int yy_offset; }; #endif /* _LINUX_RTC_M48T59_H_ */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a7da7dfc06a2..14b88f551920 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -7,7 +7,6 @@ #include <linux/netdevice.h> #include <linux/wait.h> #include <linux/refcount.h> -#include <linux/cleanup.h> #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -47,13 +46,15 @@ extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); -DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock()) - extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; +#define ASSERT_RTNL() \ + WARN_ONCE(!rtnl_is_locked(), \ + "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) + #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else @@ -95,6 +96,61 @@ static inline bool lockdep_rtnl_is_held(void) #define rcu_replace_pointer_rtnl(rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net); +void __rtnl_net_unlock(struct net *net); +void rtnl_net_lock(struct net *net); +void rtnl_net_unlock(struct net *net); +int rtnl_net_trylock(struct net *net); +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); + +bool rtnl_net_is_locked(struct net *net); + +#define ASSERT_RTNL_NET(net) \ + WARN_ONCE(!rtnl_net_is_locked(net), \ + "RTNL_NET: assertion failed at %s (%d)\n", \ + __FILE__, __LINE__) + +bool lockdep_rtnl_net_is_held(struct net *net); + +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_check(p, lockdep_rtnl_net_is_held(net)) +#define rtnl_net_dereference(net, p) \ + rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net)) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net)) +#else +static inline void __rtnl_net_lock(struct net *net) {} +static inline void __rtnl_net_unlock(struct net *net) {} + +static inline void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); +} + +static inline void rtnl_net_unlock(struct net *net) +{ + rtnl_unlock(); +} + +static inline int rtnl_net_trylock(struct net *net) +{ + return rtnl_trylock(); +} + +static inline void ASSERT_RTNL_NET(struct net *net) +{ + ASSERT_RTNL(); +} + +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_rtnl(p) +#define rtnl_net_dereference(net, p) \ + rtnl_dereference(p) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer_rtnl(rp, p) +#endif + static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); @@ -122,10 +178,6 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); -#define ASSERT_RTNL() \ - WARN_ONCE(!rtnl_is_locked(), \ - "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) - extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, diff --git a/include/linux/rtsx_common.h b/include/linux/rtsx_common.h index bf290ad14c57..da9c8c6b5d50 100644 --- a/include/linux/rtsx_common.h +++ b/include/linux/rtsx_common.h @@ -12,7 +12,6 @@ #define DRV_NAME_RTSX_PCI "rtsx_pci" #define DRV_NAME_RTSX_PCI_SDMMC "rtsx_pci_sdmmc" -#define DRV_NAME_RTSX_PCI_MS "rtsx_pci_ms" #define RTSX_REG_PAIR(addr, val) (((u32)(addr) << 16) | (u8)(val)) diff --git a/include/linux/rtsx_usb.h b/include/linux/rtsx_usb.h index 3247ed8e9ff0..f267a06c6b1e 100644 --- a/include/linux/rtsx_usb.h +++ b/include/linux/rtsx_usb.h @@ -12,6 +12,10 @@ #include <linux/usb.h> +#define DRV_NAME_RTSX_USB "rtsx_usb" +#define DRV_NAME_RTSX_USB_SDMMC "rtsx_usb_sdmmc" +#define DRV_NAME_RTSX_USB_MS "rtsx_usb_ms" + /* related module names */ #define RTSX_USB_SD_CARD 0 #define RTSX_USB_MS_CARD 1 diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index c0ef596f340b..5b87c6f4a243 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -2,7 +2,7 @@ #define __LINUX_RWLOCK_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index dceb0a59b692..31d3d1116323 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -2,7 +2,7 @@ #define __LINUX_RWLOCK_API_SMP_H #ifndef __LINUX_SPINLOCK_API_SMP_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h index 8544ff05e594..7d81fc6918ee 100644 --- a/include/linux/rwlock_rt.h +++ b/include/linux/rwlock_rt.h @@ -24,13 +24,13 @@ do { \ __rt_rwlock_init(rwl, #rwl, &__key); \ } while (0) -extern void rt_read_lock(rwlock_t *rwlock); +extern void rt_read_lock(rwlock_t *rwlock) __acquires(rwlock); extern int rt_read_trylock(rwlock_t *rwlock); -extern void rt_read_unlock(rwlock_t *rwlock); -extern void rt_write_lock(rwlock_t *rwlock); -extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass); +extern void rt_read_unlock(rwlock_t *rwlock) __releases(rwlock); +extern void rt_write_lock(rwlock_t *rwlock) __acquires(rwlock); +extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(rwlock); extern int rt_write_trylock(rwlock_t *rwlock); -extern void rt_write_unlock(rwlock_t *rwlock); +extern void rt_write_unlock(rwlock_t *rwlock) __releases(rwlock); static __always_inline void read_lock(rwlock_t *rwlock) { diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index c09cdcc99471..189140bf11fc 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -40,7 +40,7 @@ struct sbitmap_word { /** * @swap_lock: serializes simultaneous updates of ->word and ->cleared */ - spinlock_t swap_lock; + raw_spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index e61d164622db..d836e7440ee8 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -273,7 +273,7 @@ static inline void sg_unmark_end(struct scatterlist *sg) } /* - * One 64-bit architectures there is a 4-byte padding in struct scatterlist + * On 64-bit architectures there is a 4-byte padding in struct scatterlist * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA * flags bits to indicate when a specific dma address is a bus address or the * buffer may have been bounced via SWIOTLB. @@ -313,7 +313,7 @@ static inline void sg_dma_mark_bus_address(struct scatterlist *sg) } /** - * sg_unmark_bus_address - Unmark the scatterlist entry as a bus address + * sg_dma_unmark_bus_address - Unmark the scatterlist entry as a bus address * @sg: SG entry * * Description: diff --git a/include/linux/sched.h b/include/linux/sched.h index f8d150343d42..66b311fbd5d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -82,6 +82,8 @@ struct task_group; struct task_struct; struct user_event_mm; +#include <linux/sched/ext.h> + /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). @@ -149,8 +151,9 @@ struct user_event_mm; * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). */ -#define is_special_task_state(state) \ - ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) +#define is_special_task_state(state) \ + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \ + TASK_DEAD | TASK_FROZEN)) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ @@ -541,9 +544,14 @@ struct sched_entity { struct rb_node run_node; u64 deadline; u64 min_vruntime; + u64 min_slice; struct list_head group_node; - unsigned int on_rq; + unsigned char on_rq; + unsigned char sched_delayed; + unsigned char rel_deadline; + unsigned char custom_slice; + /* hole */ u64 exec_start; u64 sum_exec_runtime; @@ -639,12 +647,33 @@ struct sched_dl_entity { * * @dl_overrun tells if the task asked to be informed about runtime * overruns. + * + * @dl_server tells if this is a server entity. + * + * @dl_defer tells if this is a deferred or regular server. For + * now only defer server exists. + * + * @dl_defer_armed tells if the deferrable server is waiting + * for the replenishment timer to activate it. + * + * @dl_server_active tells if the dlserver is active(started). + * dlserver is started on first cfs enqueue on an idle runqueue + * and is stopped when a dequeue results in 0 cfs tasks on the + * runqueue. In other words, dlserver is active only when cpu's + * runqueue has atleast one cfs task. + * + * @dl_defer_running tells if the deferrable server is actually + * running, skipping the defer phase. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; + unsigned int dl_server_active : 1; + unsigned int dl_defer : 1; + unsigned int dl_defer_armed : 1; + unsigned int dl_defer_running : 1; /* * Bandwidth enforcement timer. Each -deadline task has its @@ -672,7 +701,7 @@ struct sched_dl_entity { */ struct rq *rq; dl_server_has_tasks_f server_has_tasks; - dl_server_pick_f server_pick; + dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES /* @@ -810,6 +839,9 @@ struct task_struct { struct sched_rt_entity rt; struct sched_dl_entity dl; struct sched_dl_entity *dl_server; +#ifdef CONFIG_SCHED_CLASS_EXT + struct sched_ext_entity scx; +#endif const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE @@ -976,7 +1008,7 @@ struct task_struct { #ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif -#ifdef CONFIG_CPU_SUP_INTEL +#ifdef CONFIG_X86_BUS_LOCK_DETECT unsigned reported_split_lock:1; #endif #ifdef CONFIG_TASK_DELAY_ACCT @@ -1096,9 +1128,12 @@ struct task_struct { /* * executable name, excluding path. * - * - normally initialized setup_new_exec() - * - access it with [gs]et_task_comm() - * - lock it with task_lock() + * - normally initialized begin_new_exec() + * - set it with set_task_comm() + * - strscpy_pad() to ensure it is always NUL-terminated and + * zero-padded + * - task_lock() to ensure the operation is atomic and the name is + * fully updated. */ char comm[TASK_COMM_LEN]; @@ -1243,7 +1278,6 @@ struct task_struct { /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; - int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock: */ @@ -1417,6 +1451,7 @@ struct task_struct { /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; + unsigned long long ftrace_sleeptime; /* * Number of functions that haven't been traced @@ -1657,8 +1692,8 @@ extern struct pid *cad_pid; * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */ -#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */ +#define PF__HOLE__00800000 0x00800000 +#define PF__HOLE__01000000 0x01000000 #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ @@ -1874,7 +1909,7 @@ extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK # define task_thread_info(task) (&(task)->thread_info) -#elif !defined(__HAVE_THREAD_FUNCTIONS) +#else # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif @@ -1914,10 +1949,23 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from) __set_task_comm(tsk, from, false); } -extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); +/* + * - Why not use task_lock()? + * User space can randomly change their names anyway, so locking for readers + * doesn't make sense. For writers, locking is probably necessary, as a race + * condition could lead to long-term mixed results. + * The strscpy_pad() in __set_task_comm() can ensure that the task comm is + * always NUL-terminated and zero-padded. Therefore the race condition between + * reader and writer is not an issue. + * + * - BUILD_BUG_ON() can help prevent the buf from being truncated. + * Since the callers don't perform any return value checks, this safeguard is + * necessary. + */ #define get_task_comm(buf, tsk) ({ \ - BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ - __get_task_comm(buf, sizeof(buf), tsk); \ + BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \ + strscpy_pad(buf, (tsk)->comm); \ + buf; \ }) #ifdef CONFIG_SMP @@ -1978,7 +2026,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk) static inline void clear_tsk_need_resched(struct task_struct *tsk) { - clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED); + atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY, + (atomic_long_t *)&task_thread_info(tsk)->flags); } static inline int test_tsk_need_resched(struct task_struct *tsk) @@ -2109,6 +2158,11 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ +static inline bool task_is_runnable(struct task_struct *p) +{ + return p->on_rq && !p->se.sched_delayed; +} + extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index e62ff805cfc9..6eb65ceed213 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -8,12 +8,6 @@ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ -/* mm flags */ - -/* for SUID_DUMP_* above */ -#define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) - extern void set_dumpable(struct mm_struct *mm, int value); /* * This returns the actual value of the suid_dumpable flag. For things @@ -31,80 +25,4 @@ static inline int get_dumpable(struct mm_struct *mm) return __get_dumpable(mm->flags); } -/* coredump filter bits */ -#define MMF_DUMP_ANON_PRIVATE 2 -#define MMF_DUMP_ANON_SHARED 3 -#define MMF_DUMP_MAPPED_PRIVATE 4 -#define MMF_DUMP_MAPPED_SHARED 5 -#define MMF_DUMP_ELF_HEADERS 6 -#define MMF_DUMP_HUGETLB_PRIVATE 7 -#define MMF_DUMP_HUGETLB_SHARED 8 -#define MMF_DUMP_DAX_PRIVATE 9 -#define MMF_DUMP_DAX_SHARED 10 - -#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 9 -#define MMF_DUMP_FILTER_MASK \ - (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) -#define MMF_DUMP_FILTER_DEFAULT \ - ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\ - (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF) - -#ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS -# define MMF_DUMP_MASK_DEFAULT_ELF (1 << MMF_DUMP_ELF_HEADERS) -#else -# define MMF_DUMP_MASK_DEFAULT_ELF 0 -#endif - /* leave room for more dump flags */ -#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ -#define MMF_VM_HUGEPAGE 17 /* set when mm is available for - khugepaged */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ - -#define MMF_HAS_UPROBES 19 /* has uprobes */ -#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ -#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ -#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ -#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) -#define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ -#define MMF_MULTIPROCESS 26 /* mm is shared between processes */ -/* - * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either - * replaced in the future by mm.pinned_vm when it becomes stable, or grow into - * a counter on its own. We're aggresive on this bit for now: even if the - * pinned pages were unpinned later on, we'll still keep this bit set for the - * lifecycle of this mm, just for simplicity. - */ -#define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ - -#define MMF_HAS_MDWE 28 -#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) - - -#define MMF_HAS_MDWE_NO_INHERIT 29 - -#define MMF_VM_MERGE_ANY 30 -#define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) - -#define MMF_TOPDOWN 31 /* mm searches top down by default */ -#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) - -#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ - MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) - -static inline unsigned long mmf_init_flags(unsigned long flags) -{ - if (flags & (1UL << MMF_HAS_MDWE_NO_INHERIT)) - flags &= ~((1UL << MMF_HAS_MDWE) | - (1UL << MMF_HAS_MDWE_NO_INHERIT)); - return flags & MMF_INIT_MASK; -} - #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index df3aca89d4f5..3a912ab42bb5 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -10,16 +10,16 @@ #include <linux/sched.h> -#define MAX_DL_PRIO 0 - -static inline int dl_prio(int prio) +static inline bool dl_prio(int prio) { - if (unlikely(prio < MAX_DL_PRIO)) - return 1; - return 0; + return unlikely(prio < MAX_DL_PRIO); } -static inline int dl_task(struct task_struct *p) +/* + * Returns true if a task has a priority that belongs to DL class. PI-boosted + * tasks will return true. Use dl_policy() to ignore PI-boosted tasks. + */ +static inline bool dl_task(struct task_struct *p) { return dl_prio(p->prio); } diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h new file mode 100644 index 000000000000..1d70a9867fb1 --- /dev/null +++ b/include/linux/sched/ext.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst + * + * Copyright (c) 2022 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2022 Tejun Heo <tj@kernel.org> + * Copyright (c) 2022 David Vernet <dvernet@meta.com> + */ +#ifndef _LINUX_SCHED_EXT_H +#define _LINUX_SCHED_EXT_H + +#ifdef CONFIG_SCHED_CLASS_EXT + +#include <linux/llist.h> +#include <linux/rhashtable-types.h> + +enum scx_public_consts { + SCX_OPS_NAME_LEN = 128, + + SCX_SLICE_DFL = 20 * 1000000, /* 20ms */ + SCX_SLICE_INF = U64_MAX, /* infinite, implies nohz */ +}; + +/* + * DSQ (dispatch queue) IDs are 64bit of the format: + * + * Bits: [63] [62 .. 0] + * [ B] [ ID ] + * + * B: 1 for IDs for built-in DSQs, 0 for ops-created user DSQs + * ID: 63 bit ID + * + * Built-in IDs: + * + * Bits: [63] [62] [61..32] [31 .. 0] + * [ 1] [ L] [ R ] [ V ] + * + * 1: 1 for built-in DSQs. + * L: 1 for LOCAL_ON DSQ IDs, 0 for others + * V: For LOCAL_ON DSQ IDs, a CPU number. For others, a pre-defined value. + */ +enum scx_dsq_id_flags { + SCX_DSQ_FLAG_BUILTIN = 1LLU << 63, + SCX_DSQ_FLAG_LOCAL_ON = 1LLU << 62, + + SCX_DSQ_INVALID = SCX_DSQ_FLAG_BUILTIN | 0, + SCX_DSQ_GLOBAL = SCX_DSQ_FLAG_BUILTIN | 1, + SCX_DSQ_LOCAL = SCX_DSQ_FLAG_BUILTIN | 2, + SCX_DSQ_LOCAL_ON = SCX_DSQ_FLAG_BUILTIN | SCX_DSQ_FLAG_LOCAL_ON, + SCX_DSQ_LOCAL_CPU_MASK = 0xffffffffLLU, +}; + +/* + * A dispatch queue (DSQ) can be either a FIFO or p->scx.dsq_vtime ordered + * queue. A built-in DSQ is always a FIFO. The built-in local DSQs are used to + * buffer between the scheduler core and the BPF scheduler. See the + * documentation for more details. + */ +struct scx_dispatch_q { + raw_spinlock_t lock; + struct list_head list; /* tasks in dispatch order */ + struct rb_root priq; /* used to order by p->scx.dsq_vtime */ + u32 nr; + u32 seq; /* used by BPF iter */ + u64 id; + struct rhash_head hash_node; + struct llist_node free_node; + struct rcu_head rcu; +}; + +/* scx_entity.flags */ +enum scx_ent_flags { + SCX_TASK_QUEUED = 1 << 0, /* on ext runqueue */ + SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */ + SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ + + SCX_TASK_STATE_SHIFT = 8, /* bit 8 and 9 are used to carry scx_task_state */ + SCX_TASK_STATE_BITS = 2, + SCX_TASK_STATE_MASK = ((1 << SCX_TASK_STATE_BITS) - 1) << SCX_TASK_STATE_SHIFT, + + SCX_TASK_CURSOR = 1 << 31, /* iteration cursor, not a task */ +}; + +/* scx_entity.flags & SCX_TASK_STATE_MASK */ +enum scx_task_state { + SCX_TASK_NONE, /* ops.init_task() not called yet */ + SCX_TASK_INIT, /* ops.init_task() succeeded, but task can be cancelled */ + SCX_TASK_READY, /* fully initialized, but not in sched_ext */ + SCX_TASK_ENABLED, /* fully initialized and in sched_ext */ + + SCX_TASK_NR_STATES, +}; + +/* scx_entity.dsq_flags */ +enum scx_ent_dsq_flags { + SCX_TASK_DSQ_ON_PRIQ = 1 << 0, /* task is queued on the priority queue of a dsq */ +}; + +/* + * Mask bits for scx_entity.kf_mask. Not all kfuncs can be called from + * everywhere and the following bits track which kfunc sets are currently + * allowed for %current. This simple per-task tracking works because SCX ops + * nest in a limited way. BPF will likely implement a way to allow and disallow + * kfuncs depending on the calling context which will replace this manual + * mechanism. See scx_kf_allow(). + */ +enum scx_kf_mask { + SCX_KF_UNLOCKED = 0, /* sleepable and not rq locked */ + /* ENQUEUE and DISPATCH may be nested inside CPU_RELEASE */ + SCX_KF_CPU_RELEASE = 1 << 0, /* ops.cpu_release() */ + /* ops.dequeue (in REST) may be nested inside DISPATCH */ + SCX_KF_DISPATCH = 1 << 1, /* ops.dispatch() */ + SCX_KF_ENQUEUE = 1 << 2, /* ops.enqueue() and ops.select_cpu() */ + SCX_KF_SELECT_CPU = 1 << 3, /* ops.select_cpu() */ + SCX_KF_REST = 1 << 4, /* other rq-locked operations */ + + __SCX_KF_RQ_LOCKED = SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH | + SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, + __SCX_KF_TERMINAL = SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST, +}; + +enum scx_dsq_lnode_flags { + SCX_DSQ_LNODE_ITER_CURSOR = 1 << 0, + + /* high 16 bits can be for iter cursor flags */ + __SCX_DSQ_LNODE_PRIV_SHIFT = 16, +}; + +struct scx_dsq_list_node { + struct list_head node; + u32 flags; + u32 priv; /* can be used by iter cursor */ +}; + +/* + * The following is embedded in task_struct and contains all fields necessary + * for a task to be scheduled by SCX. + */ +struct sched_ext_entity { + struct scx_dispatch_q *dsq; + struct scx_dsq_list_node dsq_list; /* dispatch order */ + struct rb_node dsq_priq; /* p->scx.dsq_vtime order */ + u32 dsq_seq; + u32 dsq_flags; /* protected by DSQ lock */ + u32 flags; /* protected by rq lock */ + u32 weight; + s32 sticky_cpu; + s32 holding_cpu; + u32 kf_mask; /* see scx_kf_mask above */ + struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */ + atomic_long_t ops_state; + + struct list_head runnable_node; /* rq->scx.runnable_list */ + unsigned long runnable_at; + +#ifdef CONFIG_SCHED_CORE + u64 core_sched_at; /* see scx_prio_less() */ +#endif + u64 ddsp_dsq_id; + u64 ddsp_enq_flags; + + /* BPF scheduler modifiable fields */ + + /* + * Runtime budget in nsecs. This is usually set through + * scx_bpf_dispatch() but can also be modified directly by the BPF + * scheduler. Automatically decreased by SCX as the task executes. On + * depletion, a scheduling event is triggered. + * + * This value is cleared to zero if the task is preempted by + * %SCX_KICK_PREEMPT and shouldn't be used to determine how long the + * task ran. Use p->se.sum_exec_runtime instead. + */ + u64 slice; + + /* + * Used to order tasks when dispatching to the vtime-ordered priority + * queue of a dsq. This is usually set through scx_bpf_dispatch_vtime() + * but can also be modified directly by the BPF scheduler. Modifying it + * while a task is queued on a dsq may mangle the ordering and is not + * recommended. + */ + u64 dsq_vtime; + + /* + * If set, reject future sched_setscheduler(2) calls updating the policy + * to %SCHED_EXT with -%EACCES. + * + * Can be set from ops.init_task() while the BPF scheduler is being + * loaded (!scx_init_task_args->fork). If set and the task's policy is + * already %SCHED_EXT, the task's policy is rejected and forcefully + * reverted to %SCHED_NORMAL. The number of such events are reported + * through /sys/kernel/debug/sched_ext::nr_rejected. Setting this flag + * during fork is not allowed. + */ + bool disallow; /* reject switching into SCX */ + + /* cold fields */ +#ifdef CONFIG_EXT_GROUP_SCHED + struct cgroup *cgrp_moving_from; +#endif + struct list_head tasks_node; +}; + +void sched_ext_free(struct task_struct *p); +void print_scx_info(const char *log_lvl, struct task_struct *p); +void scx_softlockup(u32 dur_s); + +#else /* !CONFIG_SCHED_CLASS_EXT */ + +static inline void sched_ext_free(struct task_struct *p) {} +static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} +static inline void scx_softlockup(u32 dur_s) {} + +#endif /* CONFIG_SCHED_CLASS_EXT */ +#endif /* _LINUX_SCHED_EXT_H */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 91546493c43d..928a626725e6 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -179,27 +179,20 @@ static inline void mm_update_next_owner(struct mm_struct *mm) extern void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack); -extern unsigned long -arch_get_unmapped_area(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long); -extern unsigned long + +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t vm_flags); +unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t); unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -unsigned long -arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags, vm_flags_t vm_flags); -unsigned long -arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags, vm_flags_t); - unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, struct file *filp, unsigned long addr, @@ -211,11 +204,11 @@ unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, unsigned long generic_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags); + unsigned long flags, vm_flags_t vm_flags); unsigned long generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, - unsigned long flags); + unsigned long flags, vm_flags_t vm_flags); #else static inline void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) {} @@ -258,25 +251,16 @@ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | - PF_MEMALLOC_NOFS | - PF_MEMALLOC_NORECLAIM | - PF_MEMALLOC_NOWARN | - PF_MEMALLOC_PIN))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { /* - * Stronger flags before weaker flags: - * NORECLAIM implies NOIO, which in turn implies NOFS + * NOIO implies both NOIO and NOFS and it is a weaker context + * so always make sure it makes precedence */ - if (pflags & PF_MEMALLOC_NORECLAIM) - flags &= ~__GFP_DIRECT_RECLAIM; - else if (pflags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; - if (pflags & PF_MEMALLOC_NOWARN) - flags |= __GFP_NOWARN; - if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index ab83d85e1183..6ab43b4f72f9 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -14,6 +14,7 @@ */ #define MAX_RT_PRIO 100 +#define MAX_DL_PRIO 0 #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index b2b9e6eb9683..4e3338103654 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -6,19 +6,40 @@ struct task_struct; -static inline int rt_prio(int prio) +static inline bool rt_prio(int prio) { - if (unlikely(prio < MAX_RT_PRIO)) - return 1; - return 0; + return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO); } -static inline int rt_task(struct task_struct *p) +static inline bool rt_or_dl_prio(int prio) +{ + return unlikely(prio < MAX_RT_PRIO); +} + +/* + * Returns true if a task has a priority that belongs to RT class. PI-boosted + * tasks will return true. Use rt_policy() to ignore PI-boosted tasks. + */ +static inline bool rt_task(struct task_struct *p) { return rt_prio(p->prio); } -static inline bool task_is_realtime(struct task_struct *tsk) +/* + * Returns true if a task has a priority that belongs to RT or DL classes. + * PI-boosted tasks will return true. Use rt_or_dl_task_policy() to ignore + * PI-boosted tasks. + */ +static inline bool rt_or_dl_task(struct task_struct *p) +{ + return rt_or_dl_prio(p->prio); +} + +/* + * Returns true if a task has a policy that belongs to RT or DL classes. + * PI-boosted tasks will return false. + */ +static inline bool rt_or_dl_task_policy(struct task_struct *tsk) { int policy = tsk->policy; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0a0e23c45406..d5d03d919df8 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -137,7 +137,8 @@ struct signal_struct { /* POSIX.1b Interval Timers */ unsigned int next_posix_timer_id; - struct list_head posix_timers; + struct hlist_head posix_timers; + struct hlist_head ignored_posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; @@ -276,8 +277,7 @@ static inline void signal_set_stop_flags(struct signal_struct *sig, extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *task, sigset_t *mask, - kernel_siginfo_t *info, enum pid_type *type); +extern int dequeue_signal(sigset_t *mask, kernel_siginfo_t *info, enum pid_type *type); static inline int kernel_dequeue_signal(void) { @@ -287,7 +287,7 @@ static inline int kernel_dequeue_signal(void) int ret; spin_lock_irq(&task->sighand->siglock); - ret = dequeue_signal(task, &task->blocked, &__info, &__type); + ret = dequeue_signal(&task->blocked, &__info, &__type); spin_unlock_irq(&task->sighand->siglock); return ret; @@ -339,9 +339,6 @@ extern void force_fatal_sig(int); extern void force_exit_sig(int); extern int send_sig(int, struct task_struct *, int); extern int zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type); extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); static inline void clear_notify_signal(void) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index d362aacf9f89..0f2aeb37bbb0 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -63,7 +63,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern int sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs); +extern void sched_cancel_fork(struct task_struct *p); extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); @@ -119,6 +120,11 @@ static inline struct task_struct *get_task_struct(struct task_struct *t) return t; } +static inline struct task_struct *tryget_task_struct(struct task_struct *t) +{ + return refcount_inc_not_zero(&t->usage) ? t : NULL; +} + extern void __put_task_struct(struct task_struct *t); extern void __put_task_struct_rcu_cb(struct rcu_head *rhp); diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index ccd72b978e1f..cffad65bdc6a 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/magic.h> #include <linux/refcount.h> +#include <linux/kasan.h> #ifdef CONFIG_THREAD_INFO_IN_TASK @@ -33,7 +34,7 @@ static __always_inline unsigned long *end_of_stack(const struct task_struct *tas #endif } -#elif !defined(__HAVE_THREAD_FUNCTIONS) +#else #define task_stack_page(task) ((void *)(task)->stack) @@ -89,29 +90,18 @@ static inline int object_is_on_stack(const void *obj) { void *stack = task_stack_page(current); + obj = kasan_reset_tag(obj); return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE +unsigned long stack_not_used(struct task_struct *p); +#else static inline unsigned long stack_not_used(struct task_struct *p) { - unsigned long *n = end_of_stack(p); - - do { /* Skip over canary */ -# ifdef CONFIG_STACK_GROWSUP - n--; -# else - n++; -# endif - } while (!*n); - -# ifdef CONFIG_STACK_GROWSUP - return (unsigned long)end_of_stack(p) - (unsigned long)n; -# else - return (unsigned long)n - (unsigned long)end_of_stack(p); -# endif + return 0; } #endif extern void set_task_stack_end_magic(struct task_struct *tsk); diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h new file mode 100644 index 000000000000..066216f1357a --- /dev/null +++ b/include/linux/scmi_imx_protocol.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * SCMI Message Protocol driver NXP extension header + * + * Copyright 2024 NXP. + */ + +#ifndef _LINUX_SCMI_NXP_PROTOCOL_H +#define _LINUX_SCMI_NXP_PROTOCOL_H + +#include <linux/bitfield.h> +#include <linux/device.h> +#include <linux/notifier.h> +#include <linux/types.h> + +enum scmi_nxp_protocol { + SCMI_PROTOCOL_IMX_BBM = 0x81, + SCMI_PROTOCOL_IMX_MISC = 0x84, +}; + +struct scmi_imx_bbm_proto_ops { + int (*rtc_time_set)(const struct scmi_protocol_handle *ph, u32 id, + uint64_t sec); + int (*rtc_time_get)(const struct scmi_protocol_handle *ph, u32 id, + u64 *val); + int (*rtc_alarm_set)(const struct scmi_protocol_handle *ph, u32 id, + bool enable, u64 sec); + int (*button_get)(const struct scmi_protocol_handle *ph, u32 *state); +}; + +enum scmi_nxp_notification_events { + SCMI_EVENT_IMX_BBM_RTC = 0x0, + SCMI_EVENT_IMX_BBM_BUTTON = 0x1, + SCMI_EVENT_IMX_MISC_CONTROL = 0x0, +}; + +struct scmi_imx_bbm_notif_report { + bool is_rtc; + bool is_button; + ktime_t timestamp; + unsigned int rtc_id; + unsigned int rtc_evt; +}; + +struct scmi_imx_misc_ctrl_notify_report { + ktime_t timestamp; + unsigned int ctrl_id; + unsigned int flags; +}; + +struct scmi_imx_misc_proto_ops { + int (*misc_ctrl_set)(const struct scmi_protocol_handle *ph, u32 id, + u32 num, u32 *val); + int (*misc_ctrl_get)(const struct scmi_protocol_handle *ph, u32 id, + u32 *num, u32 *val); + int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph, + u32 ctrl_id, u32 evt_id, u32 flags); +}; +#endif diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 709ad84809e1..341980599c71 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -32,6 +32,11 @@ static inline int secure_computing(void) } #else extern void secure_computing_strict(int this_syscall); +static inline int __secure_computing(const struct seccomp_data *sd) +{ + secure_computing_strict(sd->nr); + return 0; +} #endif extern long prctl_get_seccomp(void); diff --git a/include/linux/security.h b/include/linux/security.h index 1390f1efb4f0..cbdba435b798 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -34,6 +34,10 @@ #include <linux/sockptr.h> #include <linux/bpf.h> #include <uapi/linux/lsm.h> +#include <linux/lsm/selinux.h> +#include <linux/lsm/smack.h> +#include <linux/lsm/apparmor.h> +#include <linux/lsm/bpf.h> struct linux_binprm; struct cred; @@ -83,6 +87,18 @@ enum lsm_event { LSM_POLICY_CHANGE, }; +struct dm_verity_digest { + const char *alg; + const u8 *digest; + size_t digest_len; +}; + +enum lsm_integrity_type { + LSM_INT_DMVERITY_SIG_VALID, + LSM_INT_DMVERITY_ROOTHASH, + LSM_INT_FSVERITY_BUILTINSIG_VALID, +}; + /* * These are reasons that can be passed to the security_locked_down() * LSM hook. Lockdown reasons that protect kernel integrity (ie, the @@ -140,6 +156,16 @@ enum lockdown_reason { LOCKDOWN_CONFIDENTIALITY_MAX, }; +/* + * Data exported by the security modules + */ +struct lsm_prop { + struct lsm_prop_selinux selinux; + struct lsm_prop_smack smack; + struct lsm_prop_apparmor apparmor; + struct lsm_prop_bpf bpf; +}; + extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1]; extern u32 lsm_active_cnt; extern const struct lsm_id *lsm_idlist[]; @@ -257,8 +283,32 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id) return kernel_load_data_str[id]; } +/** + * lsmprop_init - initialize a lsm_prop structure + * @prop: Pointer to the data to initialize + * + * Set all secid for all modules to the specified value. + */ +static inline void lsmprop_init(struct lsm_prop *prop) +{ + memset(prop, 0, sizeof(*prop)); +} + #ifdef CONFIG_SECURITY +/** + * lsmprop_is_set - report if there is a value in the lsm_prop + * @prop: Pointer to the exported LSM data + * + * Returns true if there is a value set, false otherwise + */ +static inline bool lsmprop_is_set(struct lsm_prop *prop) +{ + const struct lsm_prop empty = {}; + + return !!memcmp(prop, &empty, sizeof(*prop)); +} + int call_blocking_lsm_notifier(enum lsm_event event, void *data); int register_blocking_lsm_notifier(struct notifier_block *nb); int unregister_blocking_lsm_notifier(struct notifier_block *nb); @@ -336,7 +386,7 @@ int security_dentry_create_files_as(struct dentry *dentry, int mode, struct cred *new); int security_path_notify(const struct path *path, u64 mask, unsigned int obj_type); -int security_inode_alloc(struct inode *inode); +int security_inode_alloc(struct inode *inode, gfp_t gfp); void security_inode_free(struct inode *inode); int security_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, @@ -396,9 +446,12 @@ int security_inode_getsecurity(struct mnt_idmap *idmap, void **buffer, bool alloc); int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags); int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); -void security_inode_getsecid(struct inode *inode, u32 *secid); +void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop); int security_inode_copy_up(struct dentry *src, struct cred **new); int security_inode_copy_up_xattr(struct dentry *src, const char *name); +int security_inode_setintegrity(const struct inode *inode, + enum lsm_integrity_type type, const void *value, + size_t size); int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn); int security_file_permission(struct file *file, int mask); @@ -429,6 +482,7 @@ void security_cred_free(struct cred *cred); int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_transfer_creds(struct cred *new, const struct cred *old); void security_cred_getsecid(const struct cred *c, u32 *secid); +void security_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_kernel_module_request(char *kmod_name); @@ -448,8 +502,8 @@ int security_task_fix_setgroups(struct cred *new, const struct cred *old); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); int security_task_getsid(struct task_struct *p); -void security_current_getsecid_subj(u32 *secid); -void security_task_getsecid_obj(struct task_struct *p, u32 *secid); +void security_current_getlsmprop_subj(struct lsm_prop *prop); +void security_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); @@ -467,7 +521,7 @@ int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_create_user_ns(const struct cred *cred); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); -void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); +void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, struct lsm_prop *prop); int security_msg_msg_alloc(struct msg_msg *msg); void security_msg_msg_free(struct msg_msg *msg); int security_msg_queue_alloc(struct kern_ipc_perm *msq); @@ -500,6 +554,7 @@ int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); +int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void security_release_secctx(char *secdata, u32 seclen); void security_inode_invalidate_secctx(struct inode *inode); @@ -509,8 +564,24 @@ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); +int security_bdev_alloc(struct block_device *bdev); +void security_bdev_free(struct block_device *bdev); +int security_bdev_setintegrity(struct block_device *bdev, + enum lsm_integrity_type type, const void *value, + size_t size); #else /* CONFIG_SECURITY */ +/** + * lsmprop_is_set - report if there is a value in the lsm_prop + * @prop: Pointer to the exported LSM data + * + * Returns true if there is a value set, false otherwise + */ +static inline bool lsmprop_is_set(struct lsm_prop *prop) +{ + return false; +} + static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) { return 0; @@ -634,7 +705,7 @@ static inline int security_settime64(const struct timespec64 *ts, static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages)); + return __vm_enough_memory(mm, pages, !cap_vm_enough_memory(mm, pages)); } static inline int security_bprm_creds_for_exec(struct linux_binprm *bprm) @@ -769,7 +840,7 @@ static inline int security_path_notify(const struct path *path, u64 mask, return 0; } -static inline int security_inode_alloc(struct inode *inode) +static inline int security_inode_alloc(struct inode *inode, gfp_t gfp) { return 0; } @@ -1000,9 +1071,10 @@ static inline int security_inode_listsecurity(struct inode *inode, char *buffer, return 0; } -static inline void security_inode_getsecid(struct inode *inode, u32 *secid) +static inline void security_inode_getlsmprop(struct inode *inode, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_inode_copy_up(struct dentry *src, struct cred **new) @@ -1010,6 +1082,13 @@ static inline int security_inode_copy_up(struct dentry *src, struct cred **new) return 0; } +static inline int security_inode_setintegrity(const struct inode *inode, + enum lsm_integrity_type type, + const void *value, size_t size) +{ + return 0; +} + static inline int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn) { @@ -1145,6 +1224,10 @@ static inline void security_cred_getsecid(const struct cred *c, u32 *secid) *secid = 0; } +static inline void security_cred_getlsmprop(const struct cred *c, + struct lsm_prop *prop) +{ } + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; @@ -1222,14 +1305,15 @@ static inline int security_task_getsid(struct task_struct *p) return 0; } -static inline void security_current_getsecid_subj(u32 *secid) +static inline void security_current_getlsmprop_subj(struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } -static inline void security_task_getsecid_obj(struct task_struct *p, u32 *secid) +static inline void security_task_getlsmprop_obj(struct task_struct *p, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_task_setnice(struct task_struct *p, int nice) @@ -1305,9 +1389,10 @@ static inline int security_ipc_permission(struct kern_ipc_perm *ipcp, return 0; } -static inline void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid) +static inline void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, + struct lsm_prop *prop) { - *secid = 0; + lsmprop_init(prop); } static inline int security_msg_msg_alloc(struct msg_msg *msg) @@ -1441,7 +1526,14 @@ static inline int security_ismaclabel(const char *name) return 0; } -static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +static inline int security_secid_to_secctx(u32 secid, char **secdata, + u32 *seclen) +{ + return -EOPNOTSUPP; +} + +static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, + char **secdata, u32 *seclen) { return -EOPNOTSUPP; } @@ -1483,6 +1575,23 @@ static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, { return -EOPNOTSUPP; } + +static inline int security_bdev_alloc(struct block_device *bdev) +{ + return 0; +} + +static inline void security_bdev_free(struct block_device *bdev) +{ +} + +static inline int security_bdev_setintegrity(struct block_device *bdev, + enum lsm_integrity_type type, + const void *value, size_t size) +{ + return 0; +} + #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) @@ -2051,7 +2160,8 @@ static inline void security_key_post_create_or_update(struct key *keyring, int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, gfp_t gfp); int security_audit_rule_known(struct audit_krule *krule); -int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); +int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, + void *lsmrule); void security_audit_rule_free(void *lsmrule); #else @@ -2067,8 +2177,8 @@ static inline int security_audit_rule_known(struct audit_krule *krule) return 0; } -static inline int security_audit_rule_match(u32 secid, u32 field, u32 op, - void *lsmrule) +static inline int security_audit_rule_match(struct lsm_prop *prop, u32 field, + u32 op, void *lsmrule) { return 0; } @@ -2090,6 +2200,7 @@ struct dentry *securityfs_create_symlink(const char *name, const char *target, const struct inode_operations *iops); extern void securityfs_remove(struct dentry *dentry); +extern void securityfs_recursive_remove(struct dentry *dentry); #else /* CONFIG_SECURITYFS */ @@ -2137,7 +2248,7 @@ extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, struct bpf_token *token); extern void security_bpf_prog_free(struct bpf_prog *prog); extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path); + const struct path *path); extern void security_bpf_token_free(struct bpf_token *token); extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); extern int security_bpf_token_capable(const struct bpf_token *token, int cap); @@ -2177,7 +2288,7 @@ static inline void security_bpf_prog_free(struct bpf_prog *prog) { } static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, - struct path *path) + const struct path *path) { return 0; } @@ -2256,4 +2367,12 @@ static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ +#ifdef CONFIG_SECURITY +extern void security_initramfs_populated(void); +#else +static inline void security_initramfs_populated(void) +{ +} +#endif /* CONFIG_SECURITY */ + #endif /* ! __LINUX_SECURITY_H */ diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 2ac50822554e..80f33a93f944 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -52,6 +52,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_GET_GEOMETRY: case IOC_OPAL_DISCOVERY: case IOC_OPAL_REVERT_LSP: + case IOC_OPAL_SET_SID_PW: return true; } return false; diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index d90d8ee29d81..5298765d6ca4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -157,7 +157,7 @@ __seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ static __always_inline unsigned \ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ { \ - unsigned seq = READ_ONCE(s->seqcount.sequence); \ + unsigned seq = smp_load_acquire(&s->seqcount.sequence); \ \ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ return seq; \ @@ -170,7 +170,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ * Re-read the sequence counter since the (possibly \ * preempted) writer made progress. \ */ \ - seq = READ_ONCE(s->seqcount.sequence); \ + seq = smp_load_acquire(&s->seqcount.sequence); \ } \ \ return seq; \ @@ -208,7 +208,7 @@ static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) static inline unsigned __seqprop_sequence(const seqcount_t *s) { - return READ_ONCE(s->sequence); + return smp_load_acquire(&s->sequence); } static inline bool __seqprop_preemptible(const seqcount_t *s) @@ -263,17 +263,9 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) #define seqprop_assert(s) __seqprop(s, assert)(s) /** - * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier + * __read_seqcount_begin() - begin a seqcount_t read section * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants * - * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() - * barrier. Callers should ensure that smp_rmb() or equivalent ordering is - * provided before actually loading any of the variables that are to be - * protected in this critical section. - * - * Use carefully, only in critical code, and comment how the barrier is - * provided. - * * Return: count to be passed to read_seqcount_retry() */ #define __read_seqcount_begin(s) \ @@ -293,13 +285,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) * * Return: count to be passed to read_seqcount_retry() */ -#define raw_read_seqcount_begin(s) \ -({ \ - unsigned _seq = __read_seqcount_begin(s); \ - \ - smp_rmb(); \ - _seq; \ -}) +#define raw_read_seqcount_begin(s) __read_seqcount_begin(s) /** * read_seqcount_begin() - begin a seqcount_t read critical section @@ -328,7 +314,6 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) ({ \ unsigned __seq = seqprop_sequence(s); \ \ - smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ __seq; \ }) @@ -637,6 +622,23 @@ static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t * } /** + * read_seqcount_latch() - pick even/odd latch data copy + * @s: Pointer to seqcount_latch_t + * + * See write_seqcount_latch() for details and a full reader/writer usage + * example. + * + * Return: sequence counter raw value. Use the lowest bit as an index for + * picking which data copy to read. The full counter must then be checked + * with read_seqcount_latch_retry(). + */ +static __always_inline unsigned read_seqcount_latch(const seqcount_latch_t *s) +{ + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); + return raw_read_seqcount_latch(s); +} + +/** * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section * @s: Pointer to seqcount_latch_t * @start: count, from raw_read_seqcount_latch() @@ -651,8 +653,33 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) } /** + * read_seqcount_latch_retry() - end a seqcount_latch_t read section + * @s: Pointer to seqcount_latch_t + * @start: count, from read_seqcount_latch() + * + * Return: true if a read section retry is required, else false + */ +static __always_inline int +read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) +{ + kcsan_atomic_next(0); + return raw_read_seqcount_latch_retry(s, start); +} + +/** * raw_write_seqcount_latch() - redirect latch readers to even/odd copy * @s: Pointer to seqcount_latch_t + */ +static __always_inline void raw_write_seqcount_latch(seqcount_latch_t *s) +{ + smp_wmb(); /* prior stores before incrementing "sequence" */ + s->seqcount.sequence++; + smp_wmb(); /* increment "sequence" before following stores */ +} + +/** + * write_seqcount_latch_begin() - redirect latch readers to odd copy + * @s: Pointer to seqcount_latch_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -680,17 +707,11 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * * void latch_modify(struct latch_struct *latch, ...) * { - * smp_wmb(); // Ensure that the last data[1] update is visible - * latch->seq.sequence++; - * smp_wmb(); // Ensure that the seqcount update is visible - * + * write_seqcount_latch_begin(&latch->seq); * modify(latch->data[0], ...); - * - * smp_wmb(); // Ensure that the data[0] update is visible - * latch->seq.sequence++; - * smp_wmb(); // Ensure that the seqcount update is visible - * + * write_seqcount_latch(&latch->seq); * modify(latch->data[1], ...); + * write_seqcount_latch_end(&latch->seq); * } * * The query will have a form like:: @@ -701,13 +722,13 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * unsigned seq, idx; * * do { - * seq = raw_read_seqcount_latch(&latch->seq); + * seq = read_seqcount_latch(&latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * * // This includes needed smp_rmb() - * } while (raw_read_seqcount_latch_retry(&latch->seq, seq)); + * } while (read_seqcount_latch_retry(&latch->seq, seq)); * * return entry; * } @@ -731,11 +752,31 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ -static inline void raw_write_seqcount_latch(seqcount_latch_t *s) +static __always_inline void write_seqcount_latch_begin(seqcount_latch_t *s) { - smp_wmb(); /* prior stores before incrementing "sequence" */ - s->seqcount.sequence++; - smp_wmb(); /* increment "sequence" before following stores */ + kcsan_nestable_atomic_begin(); + raw_write_seqcount_latch(s); +} + +/** + * write_seqcount_latch() - redirect latch readers to even copy + * @s: Pointer to seqcount_latch_t + */ +static __always_inline void write_seqcount_latch(seqcount_latch_t *s) +{ + raw_write_seqcount_latch(s); +} + +/** + * write_seqcount_latch_end() - end a seqcount_latch_t write section + * @s: Pointer to seqcount_latch_t + * + * Marks the end of a seqcount_latch_t writer section, after all copies of the + * latch-protected data have been updated. + */ +static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s) +{ + kcsan_nestable_atomic_end(); } #define __SEQLOCK_UNLOCKED(lockname) \ @@ -769,11 +810,7 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s) */ static inline unsigned read_seqbegin(const seqlock_t *sl) { - unsigned ret = read_seqcount_begin(&sl->seqcount); - - kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ - kcsan_flat_atomic_begin(); - return ret; + return read_seqcount_begin(&sl->seqcount); } /** @@ -789,12 +826,6 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { - /* - * Assume not nested: read_seqretry() may be called multiple times when - * completing read critical section. - */ - kcsan_flat_atomic_end(); - return read_seqcount_retry(&sl->seqcount, start); } diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index fd59ed2cca53..e0717c8393d7 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -193,7 +193,7 @@ void serial8250_do_pm(struct uart_port *port, unsigned int state, unsigned int oldstate); void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl); void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud, - unsigned int quot, unsigned int quot_frac); + unsigned int quot); int fsl8250_handle_irq(struct uart_port *port); int serial8250_handle_irq(struct uart_port *port, unsigned int iir); u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index aea25eef9a1a..743b4afaad4c 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -11,6 +11,8 @@ #include <linux/compiler.h> #include <linux/console.h> #include <linux/interrupt.h> +#include <linux/lockdep.h> +#include <linux/printk.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/tty.h> @@ -503,7 +505,11 @@ struct uart_port { * The remaining bits are serial-core specific and not modifiable by * userspace. */ +#ifdef CONFIG_HAS_IOPORT #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) +#else +#define UPF_FOURPORT 0 +#endif #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) @@ -590,6 +596,95 @@ struct uart_port { void *private_data; /* generic platform data pointer */ }; +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) +{ + spin_lock_irqsave(&up->lock, *flags); +} + +/* + * Only for console->device_lock()/_unlock() callbacks and internal + * port lock wrapper synchronization. + */ +static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) +{ + spin_unlock_irqrestore(&up->lock, flags); +} + +/** + * uart_port_set_cons - Safely set the @cons field for a uart + * @up: The uart port to set + * @con: The new console to set to + * + * This function must be used to set @up->cons. It uses the port lock to + * synchronize with the port lock wrappers in order to ensure that the console + * cannot change or disappear while another context is holding the port lock. + */ +static inline void uart_port_set_cons(struct uart_port *up, struct console *con) +{ + unsigned long flags; + + __uart_port_lock_irqsave(up, &flags); + up->cons = con; + __uart_port_unlock_irqrestore(up, flags); +} + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_using_nbcon(struct uart_port *up) +{ + lockdep_assert_held_once(&up->lock); + + if (likely(!uart_console(up))) + return false; + + /* + * @up->cons is only modified under the port lock. Therefore it is + * certain that it cannot disappear here. + * + * @up->cons->node is added/removed from the console list under the + * port lock. Therefore it is certain that the registration status + * cannot change here, thus @up->cons->flags can be read directly. + */ + if (hlist_unhashed_lockless(&up->cons->node) || + !(up->cons->flags & CON_NBCON) || + !up->cons->write_atomic) { + return false; + } + + return true; +} + +/* Only for internal port lock wrapper usage. */ +static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return true; + + return nbcon_device_try_acquire(up->cons); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_acquire(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + while (!nbcon_device_try_acquire(up->cons)) + cpu_relax(); +} + +/* Only for internal port lock wrapper usage. */ +static inline void __uart_port_nbcon_release(struct uart_port *up) +{ + if (!__uart_port_using_nbcon(up)) + return; + + nbcon_device_release(up->cons); +} + /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure @@ -597,6 +692,7 @@ struct uart_port { static inline void uart_port_lock(struct uart_port *up) { spin_lock(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -606,6 +702,7 @@ static inline void uart_port_lock(struct uart_port *up) static inline void uart_port_lock_irq(struct uart_port *up) { spin_lock_irq(&up->lock); + __uart_port_nbcon_acquire(up); } /** @@ -616,6 +713,7 @@ static inline void uart_port_lock_irq(struct uart_port *up) static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); + __uart_port_nbcon_acquire(up); } /** @@ -626,7 +724,15 @@ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f */ static inline bool uart_port_trylock(struct uart_port *up) { - return spin_trylock(&up->lock); + if (!spin_trylock(&up->lock)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock(&up->lock); + return false; + } + + return true; } /** @@ -638,7 +744,15 @@ static inline bool uart_port_trylock(struct uart_port *up) */ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) { - return spin_trylock_irqsave(&up->lock, *flags); + if (!spin_trylock_irqsave(&up->lock, *flags)) + return false; + + if (!__uart_port_nbcon_try_acquire(up)) { + spin_unlock_irqrestore(&up->lock, *flags); + return false; + } + + return true; } /** @@ -647,6 +761,7 @@ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long */ static inline void uart_port_unlock(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock(&up->lock); } @@ -656,6 +771,7 @@ static inline void uart_port_unlock(struct uart_port *up) */ static inline void uart_port_unlock_irq(struct uart_port *up) { + __uart_port_nbcon_release(up); spin_unlock_irq(&up->lock); } @@ -666,6 +782,7 @@ static inline void uart_port_unlock_irq(struct uart_port *up) */ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { + __uart_port_nbcon_release(up); spin_unlock_irqrestore(&up->lock, flags); } diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h index 1672cf0810ef..102aa33d956c 100644 --- a/include/linux/serial_s3c.h +++ b/include/linux/serial_s3c.h @@ -246,24 +246,28 @@ S5PV210_UFCON_TXTRIG4 | \ S5PV210_UFCON_RXTRIG4) -#define APPLE_S5L_UCON_RXTO_ENA 9 -#define APPLE_S5L_UCON_RXTHRESH_ENA 12 -#define APPLE_S5L_UCON_TXTHRESH_ENA 13 -#define APPLE_S5L_UCON_RXTO_ENA_MSK (1 << APPLE_S5L_UCON_RXTO_ENA) -#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_RXTHRESH_ENA) -#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_TXTHRESH_ENA) +#define APPLE_S5L_UCON_RXTO_ENA 9 +#define APPLE_S5L_UCON_RXTO_LEGACY_ENA 11 +#define APPLE_S5L_UCON_RXTHRESH_ENA 12 +#define APPLE_S5L_UCON_TXTHRESH_ENA 13 +#define APPLE_S5L_UCON_RXTO_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_ENA) +#define APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_LEGACY_ENA) +#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_RXTHRESH_ENA) +#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_TXTHRESH_ENA) #define APPLE_S5L_UCON_DEFAULT (S3C2410_UCON_TXIRQMODE | \ S3C2410_UCON_RXIRQMODE | \ S3C2410_UCON_RXFIFO_TOI) #define APPLE_S5L_UCON_MASK (APPLE_S5L_UCON_RXTO_ENA_MSK | \ + APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK | \ APPLE_S5L_UCON_RXTHRESH_ENA_MSK | \ APPLE_S5L_UCON_TXTHRESH_ENA_MSK) -#define APPLE_S5L_UTRSTAT_RXTHRESH (1<<4) -#define APPLE_S5L_UTRSTAT_TXTHRESH (1<<5) -#define APPLE_S5L_UTRSTAT_RXTO (1<<9) -#define APPLE_S5L_UTRSTAT_ALL_FLAGS (0x3f0) +#define APPLE_S5L_UTRSTAT_RXTO_LEGACY BIT(3) +#define APPLE_S5L_UTRSTAT_RXTHRESH BIT(4) +#define APPLE_S5L_UTRSTAT_TXTHRESH BIT(5) +#define APPLE_S5L_UTRSTAT_RXTO BIT(9) +#define APPLE_S5L_UTRSTAT_ALL_FLAGS GENMASK(9, 3) #ifndef __ASSEMBLY__ diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 95ac8398ee72..3030d9245f5a 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -8,10 +8,10 @@ #ifdef CONFIG_ARCH_HAS_SET_MEMORY #include <asm/set_memory.h> #else -static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } -static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } -static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } -static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int __must_check set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif #ifndef set_memory_rox @@ -34,6 +34,12 @@ static inline int set_direct_map_default_noflush(struct page *page) return 0; } +static inline int set_direct_map_valid_noflush(struct page *page, + unsigned nr, bool valid) +{ + return 0; +} + static inline bool kernel_page_present(struct page *page) { return true; diff --git a/include/linux/sfp.h b/include/linux/sfp.h index b14be59550e3..60c65cea74f6 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -550,7 +550,7 @@ struct sfp_upstream_ops { void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); - void (*disconnect_phy)(void *priv); + void (*disconnect_phy)(void *priv, struct phy_device *); }; #if IS_ENABLED(CONFIG_SFP) @@ -576,6 +576,7 @@ struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode); int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops); void sfp_bus_del_upstream(struct sfp_bus *bus); +const char *sfp_get_name(struct sfp_bus *bus); #else static inline int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, @@ -654,6 +655,11 @@ static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, static inline void sfp_bus_del_upstream(struct sfp_bus *bus) { } + +static inline const char *sfp_get_name(struct sfp_bus *bus) +{ + return NULL; +} #endif #endif diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 1d06b1e5408a..0b273a7b9f01 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -42,10 +42,10 @@ struct shmem_inode_info { struct inode vfs_inode; }; -#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE +#define SHMEM_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_CASEFOLD_FL) #define SHMEM_FL_USER_MODIFIABLE \ - (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL) -#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL) + (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL) +#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL) struct shmem_quota_limits { qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */ @@ -111,23 +111,22 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags); unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, - bool global_huge); + loff_t write_end, bool shmem_huge_force); +bool shmem_hpage_pmd_enabled(void); #else -static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, - struct mm_struct *mm, unsigned long vm_flags) -{ - return false; -} static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, - bool global_huge) + loff_t write_end, bool shmem_huge_force) { return 0; } + +static inline bool shmem_hpage_pmd_enabled(void) +{ + return false; +} #endif #ifdef CONFIG_SHMEM @@ -150,8 +149,8 @@ enum sgp_type { SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ }; -int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, - enum sgp_type sgp); +int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end, + struct folio **foliop, enum sgp_type sgp); struct folio *shmem_read_folio_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 29c3ea5b6e93..58009fa66102 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -31,6 +31,7 @@ #include <linux/in6.h> #include <linux/if_packet.h> #include <linux/llist.h> +#include <linux/page_frag_cache.h> #include <net/flow.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> @@ -827,6 +828,8 @@ enum skb_tstamp_type { * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @unreadable: indicates that at least 1 of the fragments in this skb is + * unreadable. * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required @@ -1008,7 +1011,7 @@ struct sk_buff { #if IS_ENABLED(CONFIG_IP_SCTP) __u8 csum_not_inet:1; #endif - + __u8 unreadable:1; #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif @@ -1225,7 +1228,7 @@ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; - if (likely(refcount_read(&skb->users) == 1)) + if (!IS_ENABLED(CONFIG_DEBUG_NET) && likely(refcount_read(&skb->users) == 1)) smp_rmb(); else if (likely(!refcount_dec_and_test(&skb->users))) return false; @@ -1433,6 +1436,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st); void skb_abort_seq_read(struct skb_seq_state *st); +int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config); @@ -1823,6 +1827,12 @@ static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb) __skb_zcopy_downgrade_managed(skb); } +/* Return true if frags in this skb are readable by the host. */ +static inline bool skb_frags_readable(const struct sk_buff *skb) +{ + return !skb->unreadable; +} + static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; @@ -2539,10 +2549,17 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size) { - struct page *page = netmem_to_page(netmem); + struct page *page; __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); + if (netmem_is_net_iov(netmem)) { + skb->unreadable = true; + return; + } + + page = netmem_to_page(netmem); + /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). @@ -2665,6 +2682,12 @@ static inline void skb_assert_len(struct sk_buff *skb) #endif /* CONFIG_DEBUG_NET */ } +#if defined(CONFIG_FAIL_SKB_REALLOC) +void skb_might_realloc(struct sk_buff *skb); +#else +static inline void skb_might_realloc(struct sk_buff *skb) {} +#endif + /* * Add data to an sk_buff */ @@ -2765,6 +2788,7 @@ static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb_might_realloc(skb); if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; @@ -2893,9 +2917,19 @@ static inline void skb_reset_inner_headers(struct sk_buff *skb) skb->inner_transport_header = skb->transport_header; } +static inline int skb_mac_header_was_set(const struct sk_buff *skb) +{ + return skb->mac_header != (typeof(skb->mac_header))~0U; +} + static inline void skb_reset_mac_len(struct sk_buff *skb) { - skb->mac_len = skb->network_header - skb->mac_header; + if (!skb_mac_header_was_set(skb)) { + DEBUG_NET_WARN_ON_ONCE(1); + skb->mac_len = 0; + } else { + skb->mac_len = skb->network_header - skb->mac_header; + } } static inline unsigned char *skb_inner_transport_header(const struct sk_buff @@ -2911,7 +2945,10 @@ static inline int skb_inner_transport_offset(const struct sk_buff *skb) static inline void skb_reset_inner_transport_header(struct sk_buff *skb) { - skb->inner_transport_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_transport_header))offset); + skb->inner_transport_header = offset; } static inline void skb_set_inner_transport_header(struct sk_buff *skb, @@ -2928,7 +2965,10 @@ static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) static inline void skb_reset_inner_network_header(struct sk_buff *skb) { - skb->inner_network_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_network_header))offset); + skb->inner_network_header = offset; } static inline void skb_set_inner_network_header(struct sk_buff *skb, @@ -2950,7 +2990,10 @@ static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) static inline void skb_reset_inner_mac_header(struct sk_buff *skb) { - skb->inner_mac_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_mac_header))offset); + skb->inner_mac_header = offset; } static inline void skb_set_inner_mac_header(struct sk_buff *skb, @@ -2972,7 +3015,10 @@ static inline unsigned char *skb_transport_header(const struct sk_buff *skb) static inline void skb_reset_transport_header(struct sk_buff *skb) { - skb->transport_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->transport_header))offset); + skb->transport_header = offset; } static inline void skb_set_transport_header(struct sk_buff *skb, @@ -2989,7 +3035,10 @@ static inline unsigned char *skb_network_header(const struct sk_buff *skb) static inline void skb_reset_network_header(struct sk_buff *skb) { - skb->network_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->network_header))offset); + skb->network_header = offset; } static inline void skb_set_network_header(struct sk_buff *skb, const int offset) @@ -2998,11 +3047,6 @@ static inline void skb_set_network_header(struct sk_buff *skb, const int offset) skb->network_header += offset; } -static inline int skb_mac_header_was_set(const struct sk_buff *skb) -{ - return skb->mac_header != (typeof(skb->mac_header))~0U; -} - static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); @@ -3027,7 +3071,10 @@ static inline void skb_unset_mac_header(struct sk_buff *skb) static inline void skb_reset_mac_header(struct sk_buff *skb) { - skb->mac_header = skb->data - skb->head; + long offset = skb->data - skb->head; + + DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->mac_header))offset); + skb->mac_header = offset; } static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) @@ -3114,9 +3161,15 @@ static inline int skb_inner_network_offset(const struct sk_buff *skb) return skb_inner_network_header(skb) - skb->data; } +static inline enum skb_drop_reason +pskb_network_may_pull_reason(struct sk_buff *skb, unsigned int len) +{ + return pskb_may_pull_reason(skb, skb_network_offset(skb) + len); +} + static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) { - return pskb_may_pull(skb, skb_network_offset(skb) + len); + return pskb_network_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } /* @@ -3194,6 +3247,7 @@ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) static inline int pskb_trim(struct sk_buff *skb, unsigned int len) { + skb_might_realloc(skb); return (len < skb->len) ? __pskb_trim(skb, len) : 0; } @@ -3523,21 +3577,58 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, fragto->offset = fragfrom->offset; } +/* Return: true if the skb_frag contains a net_iov. */ +static inline bool skb_frag_is_net_iov(const skb_frag_t *frag) +{ + return netmem_is_net_iov(frag->netmem); +} + +/** + * skb_frag_net_iov - retrieve the net_iov referred to by fragment + * @frag: the fragment + * + * Return: the &struct net_iov associated with @frag. Returns NULL if this + * frag has no associated net_iov. + */ +static inline struct net_iov *skb_frag_net_iov(const skb_frag_t *frag) +{ + if (!skb_frag_is_net_iov(frag)) + return NULL; + + return netmem_to_net_iov(frag->netmem); +} + /** * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment * - * Returns the &struct page associated with @frag. + * Return: the &struct page associated with @frag. Returns NULL if this frag + * has no associated page. */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { + if (skb_frag_is_net_iov(frag)) + return NULL; + return netmem_to_page(frag->netmem); } +/** + * skb_frag_netmem - retrieve the netmem referred to by a fragment + * @frag: the fragment + * + * Return: the &netmem_ref associated with @frag. + */ +static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) +{ + return frag->netmem; +} + int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, struct bpf_prog *prog); + /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer @@ -3547,6 +3638,9 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, */ static inline void *skb_frag_address(const skb_frag_t *frag) { + if (!skb_frag_page(frag)) + return NULL; + return page_address(skb_frag_page(frag)) + skb_frag_off(frag); } @@ -3908,6 +4002,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { + skb_might_realloc(skb); if (likely(len >= skb->len)) return 0; return pskb_trim_rcsum_slow(skb, len); diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 16c241a23472..0f3c58007488 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -34,14 +34,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) bool napi_pp_put_page(netmem_ref netmem); -static inline void -skb_page_unref(struct page *page, bool recycle) +static inline void skb_page_unref(netmem_ref netmem, bool recycle) { #ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page_to_netmem(page))) + if (recycle && napi_pp_put_page(netmem)) return; #endif - put_page(page); + put_page(netmem_to_page(netmem)); } /** @@ -54,7 +53,7 @@ skb_page_unref(struct page *page, bool recycle) */ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) { - skb_page_unref(skb_frag_page(frag), recycle); + skb_page_unref(skb_frag_netmem(frag), recycle); } /** diff --git a/include/linux/slab.h b/include/linux/slab.h index eb2bf4629157..10a971c2bde3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -77,7 +77,17 @@ enum _slab_flag_bits { #define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ #define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) -/* Align objs on cache lines */ +/** + * define SLAB_HWCACHE_ALIGN - Align objects on cache line boundaries. + * + * Sufficiently large objects are aligned on cache line boundary. For object + * size smaller than a half of cache line size, the alignment is on the half of + * cache line size. In general, if object size is smaller than 1/2^n of cache + * line size, the alignment is adjusted to 1/2^n. + * + * If explicit alignment is also requested by the respective + * &struct kmem_cache_args field, the greater of both is alignments is applied. + */ #define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ #define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) @@ -87,8 +97,8 @@ enum _slab_flag_bits { #define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ #define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) -/* - * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! +/** + * define SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() @@ -99,20 +109,22 @@ enum _slab_flag_bits { * stays valid, the trick to using this is relying on an independent * object validation pass. Something like: * - * begin: - * rcu_read_lock(); - * obj = lockless_lookup(key); - * if (obj) { - * if (!try_get_ref(obj)) // might fail for free objects - * rcu_read_unlock(); - * goto begin; - * - * if (obj->key != key) { // not the object we expected - * put_ref(obj); - * rcu_read_unlock(); - * goto begin; - * } - * } + * :: + * + * begin: + * rcu_read_lock(); + * obj = lockless_lookup(key); + * if (obj) { + * if (!try_get_ref(obj)) // might fail for free objects + * rcu_read_unlock(); + * goto begin; + * + * if (obj->key != key) { // not the object we expected + * put_ref(obj); + * rcu_read_unlock(); + * goto begin; + * } + * } * rcu_read_unlock(); * * This is useful if we need to approach a kernel structure obliquely, @@ -137,7 +149,6 @@ enum _slab_flag_bits { * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -/* Defer freeing slabs to RCU */ #define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ #define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) @@ -170,7 +181,12 @@ enum _slab_flag_bits { #else # define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif -/* Account to memcg */ +/** + * define SLAB_ACCOUNT - Account allocations to memcg. + * + * All object allocations from this cache will be memcg accounted, regardless of + * __GFP_ACCOUNT being or not being passed to individual allocations. + */ #ifdef CONFIG_MEMCG # define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else @@ -197,7 +213,13 @@ enum _slab_flag_bits { #endif /* The following flags affect the page allocator grouping pages by mobility */ -/* Objects are reclaimable */ +/** + * define SLAB_RECLAIM_ACCOUNT - Objects are reclaimable. + * + * Use this flag for caches that have an associated shrinker. As a result, slab + * pages are allocated with __GFP_RECLAIMABLE, which affects grouping pages by + * mobility, and are accounted in SReclaimable counter in /proc/meminfo + */ #ifndef CONFIG_SLUB_TINY #define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else @@ -213,6 +235,12 @@ enum _slab_flag_bits { #endif /* + * freeptr_t represents a SLUB freelist pointer, which might be encoded + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. + */ +typedef struct { unsigned long v; } freeptr_t; + +/* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. @@ -234,14 +262,173 @@ struct mem_cgroup; */ bool slab_is_available(void); -struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, - unsigned int align, slab_flags_t flags, - void (*ctor)(void *)); -struct kmem_cache *kmem_cache_create_usercopy(const char *name, - unsigned int size, unsigned int align, - slab_flags_t flags, - unsigned int useroffset, unsigned int usersize, - void (*ctor)(void *)); +/** + * struct kmem_cache_args - Less common arguments for kmem_cache_create() + * + * Any uninitialized fields of the structure are interpreted as unused. The + * exception is @freeptr_offset where %0 is a valid value, so + * @use_freeptr_offset must be also set to %true in order to interpret the field + * as used. For @useroffset %0 is also valid, but only with non-%0 + * @usersize. + * + * When %NULL args is passed to kmem_cache_create(), it is equivalent to all + * fields unused. + */ +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + +struct kmem_cache *__kmem_cache_create_args(const char *name, + unsigned int object_size, + struct kmem_cache_args *args, + slab_flags_t flags); +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + slab_flags_t flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +/** + * kmem_cache_create_usercopy - Create a kmem cache with a region suitable + * for copying to userspace. + * @name: A string which is used in /proc/slabinfo to identify this cache. + * @size: The size of objects to be created in this cache. + * @align: The required alignment for the objects. + * @flags: SLAB flags + * @useroffset: Usercopy region offset + * @usersize: Usercopy region size + * @ctor: A constructor for the objects, or %NULL. + * + * This is a legacy wrapper, new code should use either KMEM_CACHE_USERCOPY() + * if whitelisting a single field is sufficient, or kmem_cache_create() with + * the necessary parameters passed via the args parameter (see + * &struct kmem_cache_args) + * + * Return: a pointer to the cache on success, NULL on failure. + */ +static inline struct kmem_cache * +kmem_cache_create_usercopy(const char *name, unsigned int size, + unsigned int align, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + .useroffset = useroffset, + .usersize = usersize, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, + slab_flags_t flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + /* Make sure we don't get passed garbage. */ + if (WARN_ON_ONCE(args)) + return ERR_PTR(-EINVAL); + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +/** + * kmem_cache_create - Create a kmem cache. + * @__name: A string which is used in /proc/slabinfo to identify this cache. + * @__object_size: The size of objects to be created in this cache. + * @__args: Optional arguments, see &struct kmem_cache_args. Passing %NULL + * means defaults will be used for all the arguments. + * + * This is currently implemented as a macro using ``_Generic()`` to call + * either the new variant of the function, or a legacy one. + * + * The new variant has 4 parameters: + * ``kmem_cache_create(name, object_size, args, flags)`` + * + * See __kmem_cache_create_args() which implements this. + * + * The legacy variant has 5 parameters: + * ``kmem_cache_create(name, object_size, align, flags, ctor)`` + * + * The align and ctor parameters map to the respective fields of + * &struct kmem_cache_args + * + * Context: Cannot be called within a interrupt, but can be interrupted. + * + * Return: a pointer to the cache on success, NULL on failure. + */ +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) + void kmem_cache_destroy(struct kmem_cache *s); int kmem_cache_shrink(struct kmem_cache *s); @@ -253,20 +440,23 @@ int kmem_cache_shrink(struct kmem_cache *s); * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ -#define KMEM_CACHE(__struct, __flags) \ - kmem_cache_create(#__struct, sizeof(struct __struct), \ - __alignof__(struct __struct), (__flags), NULL) +#define KMEM_CACHE(__struct, __flags) \ + __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ + &(struct kmem_cache_args) { \ + .align = __alignof__(struct __struct), \ + }, (__flags)) /* * To whitelist a single field for copying to/from usercopy, use this * macro instead for KMEM_CACHE() above. */ -#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ - kmem_cache_create_usercopy(#__struct, \ - sizeof(struct __struct), \ - __alignof__(struct __struct), (__flags), \ - offsetof(struct __struct, __field), \ - sizeof_field(struct __struct, __field), NULL) +#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ + __kmem_cache_create_args(#__struct, sizeof(struct __struct), \ + &(struct kmem_cache_args) { \ + .align = __alignof__(struct __struct), \ + .useroffset = offsetof(struct __struct, __field), \ + .usersize = sizeof_field(struct __struct, __field), \ + }, (__flags)) /* * Common kmalloc functions provided by all allocators @@ -280,6 +470,7 @@ void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) +DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T)) /** * ksize - Report actual allocation size of associated object @@ -547,6 +738,35 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; #define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) +/** + * kmem_cache_charge - memcg charge an already allocated slab memory + * @objp: address of the slab object to memcg charge + * @gfpflags: describe the allocation context + * + * kmem_cache_charge allows charging a slab object to the current memcg, + * primarily in cases where charging at allocation time might not be possible + * because the target memcg is not known (i.e. softirq context) + * + * The objp should be pointer returned by the slab allocator functions like + * kmalloc (with __GFP_ACCOUNT in flags) or kmem_cache_alloc. The memcg charge + * behavior can be controlled through gfpflags parameter, which affects how the + * necessary internal metadata can be allocated. Including __GFP_NOFAIL denotes + * that overcharging is requested instead of failure, but is not applied for the + * internal metadata allocation. + * + * There are several cases where it will return true even if the charging was + * not done: + * More specifically: + * + * 1. For !CONFIG_MEMCG or cgroup_disable=memory systems. + * 2. Already charged slab objects. + * 3. For slab objects from KMALLOC_NORMAL caches - allocated by kmalloc() + * without __GFP_ACCOUNT + * 4. Allocating internal metadata has failed + * + * Return: true if charge was successful otherwise false. + */ +bool kmem_cache_charge(void *objp, gfp_t gfpflags); void kmem_cache_free(struct kmem_cache *s, void *objp); kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, @@ -733,6 +953,16 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz * @new_n: new number of elements to alloc * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) + * + * If __GFP_ZERO logic is requested, callers must ensure that, starting with the + * initial memory allocation, every subsequent call to this API for the same + * memory allocation is flagged with __GFP_ZERO. Otherwise, it is possible that + * __GFP_ZERO is not fully honored by this API. + * + * See krealloc_noprof() for further details. + * + * In any case, the contents of the object pointed to are preserved up to the + * lesser of the new and old sizes. */ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, size_t new_n, @@ -841,8 +1071,8 @@ kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) #define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) #define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) -extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags) - __realloc_size(3); +void *kvrealloc_noprof(const void *p, size_t size, gfp_t flags) + __realloc_size(2); #define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) extern void kvfree(const void *addr); diff --git a/include/linux/smp.h b/include/linux/smp.h index fcd61dfe2af3..f1aa0952e8c3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -109,7 +109,7 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, * Architecture specific boot CPU setup. Defined as empty weak function in * init/main.c. Architectures can override it. */ -void smp_prepare_boot_cpu(void); +void __init smp_prepare_boot_cpu(void); #ifdef CONFIG_SMP @@ -294,4 +294,10 @@ int smpcfd_prepare_cpu(unsigned int cpu); int smpcfd_dead_cpu(unsigned int cpu); int smpcfd_dying_cpu(unsigned int cpu); +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG +bool csd_lock_is_stuck(void); +#else +static inline bool csd_lock_is_stuck(void) { return false; } +#endif + #endif /* __LINUX_SMP_H */ diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h index 56fbe2dc59b1..3e6cf2b25a97 100644 --- a/include/linux/soc/cirrus/ep93xx.h +++ b/include/linux/soc/cirrus/ep93xx.h @@ -2,7 +2,18 @@ #ifndef _SOC_EP93XX_H #define _SOC_EP93XX_H -struct platform_device; +struct regmap; +struct spinlock_t; + +enum ep93xx_soc_model { + EP93XX_9301_SOC, + EP93XX_9307_SOC, + EP93XX_9312_SOC, +}; + +#include <linux/auxiliary_bus.h> +#include <linux/compiler_types.h> +#include <linux/container_of.h> #define EP93XX_CHIP_REV_D0 3 #define EP93XX_CHIP_REV_D1 4 @@ -10,28 +21,18 @@ struct platform_device; #define EP93XX_CHIP_REV_E1 6 #define EP93XX_CHIP_REV_E2 7 -#ifdef CONFIG_ARCH_EP93XX -int ep93xx_pwm_acquire_gpio(struct platform_device *pdev); -void ep93xx_pwm_release_gpio(struct platform_device *pdev); -int ep93xx_ide_acquire_gpio(struct platform_device *pdev); -void ep93xx_ide_release_gpio(struct platform_device *pdev); -int ep93xx_keypad_acquire_gpio(struct platform_device *pdev); -void ep93xx_keypad_release_gpio(struct platform_device *pdev); -int ep93xx_i2s_acquire(void); -void ep93xx_i2s_release(void); -unsigned int ep93xx_chip_revision(void); +struct ep93xx_regmap_adev { + struct auxiliary_device adev; + struct regmap *map; + void __iomem *base; + spinlock_t *lock; + void (*write)(struct regmap *map, spinlock_t *lock, unsigned int reg, + unsigned int val); + void (*update_bits)(struct regmap *map, spinlock_t *lock, + unsigned int reg, unsigned int mask, unsigned int val); +}; -#else -static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {} -static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {} -static inline int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) { return 0; } -static inline void ep93xx_keypad_release_gpio(struct platform_device *pdev) {} -static inline int ep93xx_i2s_acquire(void) { return 0; } -static inline void ep93xx_i2s_release(void) {} -static inline unsigned int ep93xx_chip_revision(void) { return 0; } - -#endif +#define to_ep93xx_regmap_adev(_adev) \ + container_of((_adev), struct ep93xx_regmap_adev, adev) #endif diff --git a/include/linux/soc/mediatek/dvfsrc.h b/include/linux/soc/mediatek/dvfsrc.h new file mode 100644 index 000000000000..1498b3ed396b --- /dev/null +++ b/include/linux/soc/mediatek/dvfsrc.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (c) 2021 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> + */ + +#ifndef __MEDIATEK_DVFSRC_H +#define __MEDIATEK_DVFSRC_H + +enum mtk_dvfsrc_cmd { + MTK_DVFSRC_CMD_BW, + MTK_DVFSRC_CMD_HRT_BW, + MTK_DVFSRC_CMD_PEAK_BW, + MTK_DVFSRC_CMD_OPP, + MTK_DVFSRC_CMD_VCORE_LEVEL, + MTK_DVFSRC_CMD_VSCP_LEVEL, + MTK_DVFSRC_CMD_MAX, +}; + +#if IS_ENABLED(CONFIG_MTK_DVFSRC) + +int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data); +int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data); + +#else + +static inline int mtk_dvfsrc_send_request(const struct device *dev, u32 cmd, u64 data) +{ return -ENODEV; } + +static inline int mtk_dvfsrc_query_info(const struct device *dev, u32 cmd, int *data) +{ return -ENODEV; } + +#endif /* CONFIG_MTK_DVFSRC */ + +#endif diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 6c6cccc848f4..9956e18c5ffa 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -434,6 +434,11 @@ #define MT7622_TOP_AXI_PROT_EN_WB (BIT(2) | BIT(6) | \ BIT(7) | BIT(8)) +#define MT6735_TOP_AXI_PROT_EN_CONN (BIT(2) | BIT(8)) +#define MT6735_TOP_AXI_PROT_EN_MD1 (BIT(24) | BIT(25) | \ + BIT(26) | BIT(27) | \ + BIT(28)) + #define INFRA_TOPAXI_PROTECTEN 0x0220 #define INFRA_TOPAXI_PROTECTSTA1 0x0228 #define INFRA_TOPAXI_PROTECTEN_SET 0x0260 diff --git a/include/linux/soc/mediatek/mtk_sip_svc.h b/include/linux/soc/mediatek/mtk_sip_svc.h index 0761128b4354..abe24a73ee19 100644 --- a/include/linux/soc/mediatek/mtk_sip_svc.h +++ b/include/linux/soc/mediatek/mtk_sip_svc.h @@ -22,6 +22,9 @@ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, MTK_SIP_SMC_CONVENTION, \ ARM_SMCCC_OWNER_SIP, fn_id) +/* DVFSRC SMC calls */ +#define MTK_SIP_DVFSRC_VCOREFS_CONTROL MTK_SIP_SMC_CMD(0x506) + /* IOMMU related SMC call */ #define MTK_SIP_KERNEL_IOMMU_CONTROL MTK_SIP_SMC_CMD(0x514) diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index 0f038a1a0330..2996a3c28ef3 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -88,11 +88,15 @@ struct geni_se { #define SE_GENI_M_IRQ_STATUS 0x610 #define SE_GENI_M_IRQ_EN 0x614 #define SE_GENI_M_IRQ_CLEAR 0x618 +#define SE_GENI_M_IRQ_EN_SET 0x61c +#define SE_GENI_M_IRQ_EN_CLEAR 0x620 #define SE_GENI_S_CMD0 0x630 #define SE_GENI_S_CMD_CTRL_REG 0x634 #define SE_GENI_S_IRQ_STATUS 0x640 #define SE_GENI_S_IRQ_EN 0x644 #define SE_GENI_S_IRQ_CLEAR 0x648 +#define SE_GENI_S_IRQ_EN_SET 0x64c +#define SE_GENI_S_IRQ_EN_CLEAR 0x650 #define SE_GENI_TX_FIFOn 0x700 #define SE_GENI_RX_FIFOn 0x780 #define SE_GENI_TX_FIFO_STATUS 0x800 @@ -101,6 +105,8 @@ struct geni_se { #define SE_GENI_RX_WATERMARK_REG 0x810 #define SE_GENI_RX_RFR_WATERMARK_REG 0x814 #define SE_GENI_IOS 0x908 +#define SE_GENI_M_GP_LENGTH 0x910 +#define SE_GENI_S_GP_LENGTH 0x914 #define SE_DMA_TX_IRQ_STAT 0xc40 #define SE_DMA_TX_IRQ_CLR 0xc44 #define SE_DMA_TX_FSM_RST 0xc58 @@ -234,6 +240,9 @@ struct geni_se { #define IO2_DATA_IN BIT(1) #define RX_DATA_IN BIT(0) +/* SE_GENI_M_GP_LENGTH and SE_GENI_S_GP_LENGTH fields */ +#define GP_LENGTH GENMASK(31, 0) + /* SE_DMA_TX_IRQ_STAT Register fields */ #define TX_DMA_DONE BIT(0) #define TX_EOT BIT(1) @@ -249,8 +258,8 @@ struct geni_se { #define RX_DMA_PARITY_ERR BIT(5) #define RX_DMA_BREAK GENMASK(8, 7) #define RX_GENI_GP_IRQ GENMASK(10, 5) -#define RX_GENI_CANCEL_IRQ BIT(11) #define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) +#define RX_GENI_CANCEL_IRQ BIT(14) /* SE_HW_PARAM_0 fields */ #define TX_FIFO_WIDTH_MSK GENMASK(29, 24) diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 9e9f528b1370..8e5d78fb4847 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -54,7 +54,19 @@ #define LLCC_CAMEXP4 52 #define LLCC_DISP_WB 53 #define LLCC_DISP_1 54 +#define LLCC_VIEYE 57 +#define LLCC_VIDPTH 58 +#define LLCC_GPUMV 59 +#define LLCC_EVA_LEFT 60 +#define LLCC_EVA_RIGHT 61 +#define LLCC_EVAGAIN 62 +#define LLCC_VIPTH 63 #define LLCC_VIDVSP 64 +#define LLCC_DISP_LEFT 65 +#define LLCC_DISP_RIGHT 66 +#define LLCC_EVCS_LEFT 67 +#define LLCC_EVCS_RIGHT 68 +#define LLCC_SPAD 69 /** * struct llcc_slice_desc - Cache slice descriptor @@ -125,6 +137,7 @@ struct llcc_edac_reg_offset { * @num_banks: Number of llcc banks * @bitmap: Bit map to track the active slice ids * @ecc_irq: interrupt for llcc cache error detection and reporting + * @ecc_irq_configured: 'True' if firmware has already configured the irq propagation * @version: Indicates the LLCC version */ struct llcc_drv_data { @@ -139,6 +152,7 @@ struct llcc_drv_data { u32 num_banks; unsigned long *bitmap; int ecc_irq; + bool ecc_irq_configured; u32 version; }; diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index bd0d11af76c5..fd104b666836 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -195,6 +195,35 @@ struct ti_sci_clk_ops { u64 *current_freq); }; +/* TISCI LPM IO isolation control values */ +#define TISCI_MSG_VALUE_IO_ENABLE 1 +#define TISCI_MSG_VALUE_IO_DISABLE 0 + +/* TISCI LPM constraint state values */ +#define TISCI_MSG_CONSTRAINT_SET 1 +#define TISCI_MSG_CONSTRAINT_CLR 0 + +/** + * struct ti_sci_pm_ops - Low Power Mode (LPM) control operations + * @lpm_wake_reason: Get the wake up source that woke the SoC from LPM + * - source: The wake up source that woke soc from LPM. + * - timestamp: Timestamp at which soc woke. + * @set_device_constraint: Set LPM constraint on behalf of a device + * - id: Device Identifier + * - state: The desired state of device constraint: set or clear. + * @set_latency_constraint: Set LPM resume latency constraint + * - latency: maximum acceptable latency to wake up from low power mode + * - state: The desired state of latency constraint: set or clear. + */ +struct ti_sci_pm_ops { + int (*lpm_wake_reason)(const struct ti_sci_handle *handle, + u32 *source, u64 *timestamp, u8 *pin, u8 *mode); + int (*set_device_constraint)(const struct ti_sci_handle *handle, + u32 id, u8 state); + int (*set_latency_constraint)(const struct ti_sci_handle *handle, + u16 latency, u8 state); +}; + /** * struct ti_sci_resource_desc - Description of TI SCI resource instance range. * @start: Start index of the first resource range. @@ -539,6 +568,7 @@ struct ti_sci_ops { struct ti_sci_core_ops core_ops; struct ti_sci_dev_ops dev_ops; struct ti_sci_clk_ops clk_ops; + struct ti_sci_pm_ops pm_ops; struct ti_sci_rm_core_ops rm_core_ops; struct ti_sci_rm_irq_ops rm_irq_ops; struct ti_sci_rm_ringacc_ops rm_ring_ops; diff --git a/include/linux/socket.h b/include/linux/socket.h index df9cdb8bbfb8..d18cc47e89bd 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -327,6 +327,7 @@ struct ucred { * plain text and require encryption */ +#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index fc5a206c4043..3e6c8e9d67ae 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -53,6 +53,8 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, /* Deprecated. * This is unsafe, unless caller checked user provided optlen. * Prefer copy_safe_from_sockptr() instead. + * + * Returns 0 for success, or number of bytes not copied on error. */ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { @@ -77,7 +79,9 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize, { if (optlen < ksize) return -EINVAL; - return copy_from_sockptr(dst, optval, ksize); + if (copy_from_sockptr(dst, optval, ksize)) + return -EFAULT; + return 0; } static inline int copy_struct_from_sockptr(void *dst, size_t ksize, diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 94fc1b57c57b..bd9836690da6 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -4,12 +4,20 @@ #ifndef __SOUNDWIRE_H #define __SOUNDWIRE_H +#include <linux/bitfield.h> #include <linux/bug.h> -#include <linux/lockdep_types.h> +#include <linux/completion.h> +#include <linux/device.h> #include <linux/irq.h> #include <linux/irqdomain.h> +#include <linux/lockdep_types.h> #include <linux/mod_devicetable.h> -#include <linux/bitfield.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <sound/sdca.h> + +struct dentry; +struct fwnode_handle; struct sdw_bus; struct sdw_slave; @@ -226,64 +234,36 @@ enum sdw_clk_stop_mode { /** * struct sdw_dp0_prop - DP0 properties + * @words: wordlengths supported * @max_word: Maximum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) * @min_word: Minimum number of bits in a Payload Channel Sample, 1 to 64 * (inclusive) * @num_words: number of wordlengths supported - * @words: wordlengths supported + * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @BRA_flow_controlled: Slave implementation results in an OK_NotReady * response * @simple_ch_prep_sm: If channel prepare sequence is required - * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts + * @num_lanes: array size of @lane_list + * @lane_list: indicates which Lanes can be used by DP0 * * The wordlengths are specified by Spec as max, min AND number of * discrete values, implementation can define based on the wordlengths they * support */ struct sdw_dp0_prop { + u32 *words; u32 max_word; u32 min_word; u32 num_words; - u32 *words; + u32 ch_prep_timeout; bool BRA_flow_controlled; bool simple_ch_prep_sm; - u32 ch_prep_timeout; bool imp_def_interrupts; -}; - -/** - * struct sdw_dpn_audio_mode - Audio mode properties for DPn - * @bus_min_freq: Minimum bus frequency, in Hz - * @bus_max_freq: Maximum bus frequency, in Hz - * @bus_num_freq: Number of discrete frequencies supported - * @bus_freq: Discrete bus frequencies, in Hz - * @min_freq: Minimum sampling frequency, in Hz - * @max_freq: Maximum sampling bus frequency, in Hz - * @num_freq: Number of discrete sampling frequency supported - * @freq: Discrete sampling frequencies, in Hz - * @prep_ch_behave: Specifies the dependencies between Channel Prepare - * sequence and bus clock configuration - * If 0, Channel Prepare can happen at any Bus clock rate - * If 1, Channel Prepare sequence shall happen only after Bus clock is - * changed to a frequency supported by this mode or compatible modes - * described by the next field - * @glitchless: Bitmap describing possible glitchless transitions from this - * Audio Mode to other Audio Modes - */ -struct sdw_dpn_audio_mode { - u32 bus_min_freq; - u32 bus_max_freq; - u32 bus_num_freq; - u32 *bus_freq; - u32 max_freq; - u32 min_freq; - u32 num_freq; - u32 *freq; - u32 prep_ch_behave; - u32 glitchless; + int num_lanes; + u32 *lane_list; }; /** @@ -298,24 +278,25 @@ struct sdw_dpn_audio_mode { * @type: Data port type. Full, Simplified or Reduced * @max_grouping: Maximum number of samples that can be grouped together for * a full data port - * @simple_ch_prep_sm: If the port supports simplified channel prepare state - * machine * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts * @max_ch: Maximum channels supported * @min_ch: Minimum channels supported * @num_channels: Number of discrete channels supported - * @channels: Discrete channels supported * @num_ch_combinations: Number of channel combinations supported + * @channels: Discrete channels supported * @ch_combinations: Channel combinations supported + * @lane_list: indicates which Lanes can be used by DPn + * @num_lanes: array size of @lane_list * @modes: SDW mode supported * @max_async_buffer: Number of samples that this port can buffer in * asynchronous modes + * @port_encoding: Payload Channel Sample encoding schemes supported * @block_pack_mode: Type of block port mode supported * @read_only_wordlength: Read Only wordlength field in DPN_BlockCtrl1 register - * @port_encoding: Payload Channel Sample encoding schemes supported - * @audio_modes: Audio modes supported + * @simple_ch_prep_sm: If the port supports simplified channel prepare state + * machine */ struct sdw_dpn_prop { u32 num; @@ -325,25 +306,29 @@ struct sdw_dpn_prop { u32 *words; enum sdw_dpn_type type; u32 max_grouping; - bool simple_ch_prep_sm; u32 ch_prep_timeout; u32 imp_def_interrupts; u32 max_ch; u32 min_ch; u32 num_channels; - u32 *channels; u32 num_ch_combinations; + u32 *channels; u32 *ch_combinations; + u32 *lane_list; + int num_lanes; u32 modes; u32 max_async_buffer; + u32 port_encoding; bool block_pack_mode; bool read_only_wordlength; - u32 port_encoding; - struct sdw_dpn_audio_mode *audio_modes; + bool simple_ch_prep_sm; }; /** * struct sdw_slave_prop - SoundWire Slave properties + * @dp0_prop: Data Port 0 properties + * @src_dpn_prop: Source Data Port N properties + * @sink_dpn_prop: Sink Data Port N properties * @mipi_revision: Spec version of the implementation * @wake_capable: Wake-up events are supported * @test_mode_capable: If test mode is supported @@ -360,23 +345,26 @@ struct sdw_dpn_prop { * SCP_AddrPage2 * @bank_delay_support: Slave implements bank delay/bridge support registers * SCP_BankDelay and SCP_NextFrame + * @lane_control_support: Slave supports lane control * @p15_behave: Slave behavior when the Master attempts a read to the Port15 * alias - * @lane_control_support: Slave supports lane control * @master_count: Number of Masters present on this Slave * @source_ports: Bitmap identifying source ports * @sink_ports: Bitmap identifying sink ports - * @dp0_prop: Data Port 0 properties - * @src_dpn_prop: Source Data Port N properties - * @sink_dpn_prop: Sink Data Port N properties - * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification + * @sdca_interrupt_register_list: indicates which sets of SDCA interrupt status + * and masks are supported + * @commit_register_supported: is PCP_Commit register supported + * @scp_int1_mask: SCP_INT1_MASK desired settings * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. * @use_domain_irq: call actual IRQ handler on slave, as well as callback */ struct sdw_slave_prop { + struct sdw_dp0_prop *dp0_prop; + struct sdw_dpn_prop *src_dpn_prop; + struct sdw_dpn_prop *sink_dpn_prop; u32 mipi_revision; bool wake_capable; bool test_mode_capable; @@ -388,16 +376,15 @@ struct sdw_slave_prop { bool high_PHY_capable; bool paging_support; bool bank_delay_support; - enum sdw_p15_behave p15_behave; bool lane_control_support; + enum sdw_p15_behave p15_behave; u32 master_count; u32 source_ports; u32 sink_ports; - struct sdw_dp0_prop *dp0_prop; - struct sdw_dpn_prop *src_dpn_prop; - struct sdw_dpn_prop *sink_dpn_prop; - u8 scp_int1_mask; u32 quirks; + u32 sdca_interrupt_register_list; + u8 commit_register_supported; + u8 scp_int1_mask; bool clock_reg_supported; bool use_domain_irq; }; @@ -406,13 +393,14 @@ struct sdw_slave_prop { /** * struct sdw_master_prop - Master properties + * @clk_gears: Clock gears supported + * @clk_freq: Clock frequencies supported, in Hz + * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification * @revision: MIPI spec version of the implementation * @clk_stop_modes: Bitmap, bit N set when clock-stop-modeN supported * @max_clk_freq: Maximum Bus clock frequency, in Hz * @num_clk_gears: Number of clock gears supported - * @clk_gears: Clock gears supported * @num_clk_freq: Number of clock frequencies supported, in Hz - * @clk_freq: Clock frequencies supported, in Hz * @default_frame_rate: Controller default Frame rate, in Hz * @default_row: Number of rows * @default_col: Number of columns @@ -421,24 +409,23 @@ struct sdw_slave_prop { * command * @mclk_freq: clock reference passed to SoundWire Master, in Hz. * @hw_disabled: if true, the Master is not functional, typically due to pin-mux - * @quirks: bitmask identifying optional behavior beyond the scope of the MIPI specification */ struct sdw_master_prop { + u32 *clk_gears; + u32 *clk_freq; + u64 quirks; u32 revision; u32 clk_stop_modes; u32 max_clk_freq; u32 num_clk_gears; - u32 *clk_gears; u32 num_clk_freq; - u32 *clk_freq; u32 default_frame_rate; u32 default_row; u32 default_col; - bool dynamic_frame; u32 err_threshold; u32 mclk_freq; + bool dynamic_frame; bool hw_disabled; - u64 quirks; }; /* Definitions for Master quirks */ @@ -488,9 +475,9 @@ struct sdw_slave_id { __u8 sdw_version:4; }; -struct sdw_extended_slave_id { - int link_id; - struct sdw_slave_id id; +struct sdw_peripherals { + int num_peripherals; + struct sdw_slave *array[]; }; /* @@ -630,7 +617,6 @@ struct sdw_slave_ops { int (*clk_stop)(struct sdw_slave *slave, enum sdw_clk_stop_mode mode, enum sdw_clk_stop_type type); - }; /** @@ -663,6 +649,7 @@ struct sdw_slave_ops { * @is_mockup_device: status flag used to squelch errors in the command/control * protocol for SoundWire mockup devices * @sdw_dev_lock: mutex used to protect callbacks/remove races + * @sdca_data: structure containing all device data for SDCA helpers */ struct sdw_slave { struct sdw_slave_id id; @@ -686,6 +673,7 @@ struct sdw_slave { bool first_interrupt_done; bool is_mockup_device; struct mutex sdw_dev_lock; /* protect callbacks/remove races */ + struct sdca_device_data sdca_data; }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) @@ -704,10 +692,7 @@ struct sdw_master_device { container_of(d, struct sdw_master_device, dev) struct sdw_driver { - const char *name; - - int (*probe)(struct sdw_slave *sdw, - const struct sdw_device_id *id); + int (*probe)(struct sdw_slave *sdw, const struct sdw_device_id *id); int (*remove)(struct sdw_slave *sdw); void (*shutdown)(struct sdw_slave *sdw); @@ -726,7 +711,7 @@ struct sdw_driver { SDW_SLAVE_ENTRY_EXT((_mfg_id), (_part_id), 0, 0, (_drv_data)) int sdw_handle_slave_status(struct sdw_bus *bus, - enum sdw_slave_status status[]); + enum sdw_slave_status status[]); /* * SDW master structures and APIs @@ -808,29 +793,28 @@ struct sdw_enable_ch { */ struct sdw_master_port_ops { int (*dpn_set_port_params)(struct sdw_bus *bus, - struct sdw_port_params *port_params, - unsigned int bank); + struct sdw_port_params *port_params, + unsigned int bank); int (*dpn_set_port_transport_params)(struct sdw_bus *bus, - struct sdw_transport_params *transport_params, - enum sdw_reg_bank bank); - int (*dpn_port_prep)(struct sdw_bus *bus, - struct sdw_prepare_ch *prepare_ch); + struct sdw_transport_params *transport_params, + enum sdw_reg_bank bank); + int (*dpn_port_prep)(struct sdw_bus *bus, struct sdw_prepare_ch *prepare_ch); int (*dpn_port_enable_ch)(struct sdw_bus *bus, - struct sdw_enable_ch *enable_ch, unsigned int bank); + struct sdw_enable_ch *enable_ch, unsigned int bank); }; struct sdw_msg; /** - * struct sdw_defer - SDW deffered message - * @length: message length + * struct sdw_defer - SDW deferred message * @complete: message completion * @msg: SDW message + * @length: message length */ struct sdw_defer { + struct sdw_msg *msg; int length; struct completion complete; - struct sdw_msg *msg; }; /** @@ -851,14 +835,11 @@ struct sdw_defer { */ struct sdw_master_ops { int (*read_prop)(struct sdw_bus *bus); - u64 (*override_adr) - (struct sdw_bus *bus, u64 addr); - enum sdw_command_response (*xfer_msg) - (struct sdw_bus *bus, struct sdw_msg *msg); - enum sdw_command_response (*xfer_msg_defer) - (struct sdw_bus *bus); + u64 (*override_adr)(struct sdw_bus *bus, u64 addr); + enum sdw_command_response (*xfer_msg)(struct sdw_bus *bus, struct sdw_msg *msg); + enum sdw_command_response (*xfer_msg_defer)(struct sdw_bus *bus); int (*set_bus_conf)(struct sdw_bus *bus, - struct sdw_bus_params *params); + struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); int (*post_bank_switch)(struct sdw_bus *bus); u32 (*read_ping_status)(struct sdw_bus *bus); @@ -873,68 +854,71 @@ struct sdw_master_ops { * struct sdw_bus - SoundWire bus * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. * @md: Master device - * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. - * @link_id: Link id number, can be 0 to N, unique for each Controller - * @id: bus system-wide unique id - * @slaves: list of Slaves on this bus - * @assigned: Bitmap for Slave device numbers. - * Bit set implies used number, bit clear implies unused number. + * @bus_lock_key: bus lock key associated to @bus_lock * @bus_lock: bus lock + * @slaves: list of Slaves on this bus + * @msg_lock_key: message lock key associated to @msg_lock * @msg_lock: message lock - * @compute_params: points to Bus resource management implementation - * @ops: Master callback ops - * @port_ops: Master port callback ops - * @params: Current bus parameters - * @prop: Master properties - * @vendor_specific_prop: pointer to non-standard properties * @m_rt_list: List of Master instance of all stream(s) running on Bus. This * is used to compute and program bus bandwidth, clock, frame shape, * transport and port parameters - * @debugfs: Bus debugfs - * @domain: IRQ domain * @defer_msg: Defer message - * @clk_stop_timeout: Clock stop timeout computed - * @bank_switch_timeout: Bank switch timeout computed - * @multi_link: Store bus property that indicates if multi links - * are supported. This flag is populated by drivers after reading - * appropriate firmware (ACPI/DT). + * @params: Current bus parameters + * @stream_refcount: number of streams currently using this bus + * @ops: Master callback ops + * @port_ops: Master port callback ops + * @prop: Master properties + * @vendor_specific_prop: pointer to non-standard properties * @hw_sync_min_links: Number of links used by a stream above which * hardware-based synchronization is required. This value is only * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. - * @stream_refcount: number of streams currently using this bus + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller + * @id: bus system-wide unique id + * @compute_params: points to Bus resource management implementation + * @assigned: Bitmap for Slave device numbers. + * Bit set implies used number, bit clear implies unused number. + * @clk_stop_timeout: Clock stop timeout computed + * @bank_switch_timeout: Bank switch timeout computed + * @domain: IRQ domain + * @irq_chip: IRQ chip + * @debugfs: Bus debugfs (optional) + * @multi_link: Store bus property that indicates if multi links + * are supported. This flag is populated by drivers after reading + * appropriate firmware (ACPI/DT). */ struct sdw_bus { struct device *dev; struct sdw_master_device *md; - int controller_id; - unsigned int link_id; - int id; - struct list_head slaves; - DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); - struct mutex bus_lock; struct lock_class_key bus_lock_key; - struct mutex msg_lock; + struct mutex bus_lock; + struct list_head slaves; struct lock_class_key msg_lock_key; - int (*compute_params)(struct sdw_bus *bus); + struct mutex msg_lock; + struct list_head m_rt_list; + struct sdw_defer defer_msg; + struct sdw_bus_params params; + int stream_refcount; const struct sdw_master_ops *ops; const struct sdw_master_port_ops *port_ops; - struct sdw_bus_params params; struct sdw_master_prop prop; void *vendor_specific_prop; - struct list_head m_rt_list; + int hw_sync_min_links; + int controller_id; + unsigned int link_id; + int id; + int (*compute_params)(struct sdw_bus *bus); + DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); + unsigned int clk_stop_timeout; + u32 bank_switch_timeout; + struct irq_chip irq_chip; + struct irq_domain *domain; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; #endif - struct irq_chip irq_chip; - struct irq_domain *domain; - struct sdw_defer defer_msg; - unsigned int clk_stop_timeout; - u32 bank_switch_timeout; bool multi_link; - int hw_sync_min_links; - int stream_refcount; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, @@ -1012,18 +996,18 @@ struct sdw_stream_params { * @params: Stream parameters * @state: Current state of the stream * @type: Stream type PCM or PDM + * @m_rt_count: Count of Master runtime(s) in this stream * @master_list: List of Master runtime(s) in this stream. * master_list can contain only one m_rt per Master instance * for a stream - * @m_rt_count: Count of Master runtime(s) in this stream */ struct sdw_stream_runtime { const char *name; struct sdw_stream_params params; enum sdw_stream_state state; enum sdw_stream_type type; - struct list_head master_list; int m_rt_count; + struct list_head master_list; }; struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); @@ -1032,12 +1016,12 @@ void sdw_release_stream(struct sdw_stream_runtime *stream); int sdw_compute_params(struct sdw_bus *bus); int sdw_stream_add_master(struct sdw_bus *bus, - struct sdw_stream_config *stream_config, - const struct sdw_port_config *port_config, - unsigned int num_ports, - struct sdw_stream_runtime *stream); + struct sdw_stream_config *stream_config, + const struct sdw_port_config *port_config, + unsigned int num_ports, + struct sdw_stream_runtime *stream); int sdw_stream_remove_master(struct sdw_bus *bus, - struct sdw_stream_runtime *stream); + struct sdw_stream_runtime *stream); int sdw_startup_stream(void *sdw_substream); int sdw_prepare_stream(struct sdw_stream_runtime *stream); int sdw_enable_stream(struct sdw_stream_runtime *stream); diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 28a4eb77717f..799f8578137b 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -27,9 +27,11 @@ #define ACP_SDW0 0 #define ACP_SDW1 1 #define AMD_SDW_MAX_MANAGER_COUNT 2 +#define ACP63_PCI_REV_ID 0x63 struct acp_sdw_pdata { u16 instance; + u32 acp_rev; /* mutex to protect acp common register access */ struct mutex *acp_sdw_lock; }; @@ -66,6 +68,7 @@ struct sdw_amd_dai_runtime { * @instance: SoundWire manager instance * @quirks: SoundWire manager quirks * @wake_en_mask: wake enable mask per SoundWire manager + * @acp_rev: acp pci device revision id * @clk_stopped: flag set to true when clock is stopped * @power_mode_mask: flag interprets amd SoundWire manager power mode * @dai_runtime_array: dai runtime array @@ -94,6 +97,7 @@ struct amd_sdw_manager { u32 quirks; u32 wake_en_mask; u32 power_mode_mask; + u32 acp_rev; bool clk_stopped; struct sdw_amd_dai_runtime **dai_runtime_array; @@ -115,25 +119,23 @@ struct sdw_amd_acpi_info { * struct sdw_amd_ctx - context allocated by the controller driver probe * * @count: link count - * @num_slaves: total number of devices exposed across all enabled links * @link_mask: bit-wise mask listing SoundWire links reported by the * Controller - * @ids: array of slave_id, representing Slaves exposed across all enabled - * links * @pdev: platform device structure + * @peripherals: array representing Peripherals exposed across all enabled links */ struct sdw_amd_ctx { int count; - int num_slaves; u32 link_mask; - struct sdw_extended_slave_id *ids; struct platform_device *pdev[AMD_SDW_MAX_MANAGER_COUNT]; + struct sdw_peripherals *peripherals; }; /** * struct sdw_amd_res - Soundwire AMD global resource structure, * typically populated by the DSP driver/Legacy driver * + * @acp_rev: acp pci device revision id * @addr: acp pci device resource start address * @reg_range: ACP register range * @link_mask: bit-wise mask listing links selected by the DSP driver/ @@ -146,6 +148,7 @@ struct sdw_amd_ctx { * @acp_lock: mutex protecting acp common registers access */ struct sdw_amd_res { + u32 acp_rev; u32 addr; u32 reg_range; u32 link_mask; diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index d537587b4499..580086417e4b 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -4,6 +4,7 @@ #ifndef __SDW_INTEL_H #define __SDW_INTEL_H +#include <linux/acpi.h> #include <linux/irqreturn.h> #include <linux/soundwire/sdw.h> @@ -227,7 +228,7 @@ struct sdw_intel_ops { /** * struct sdw_intel_acpi_info - Soundwire Intel information found in ACPI tables * @handle: ACPI controller handle - * @count: link count found with "sdw-master-count" property + * @count: link count found with "sdw-master-count" or "sdw-manager-list" property * @link_mask: bit-wise mask listing links enabled by BIOS menu * * this structure could be expanded to e.g. provide all the _ADR @@ -286,31 +287,28 @@ struct hdac_bus; * hardware capabilities after all power dependencies are settled. * @link_mask: bit-wise mask listing SoundWire links reported by the * Controller - * @num_slaves: total number of devices exposed across all enabled links * @handle: ACPI parent handle * @ldev: information for each link (controller-specific and kept * opaque here) - * @ids: array of slave_id, representing Slaves exposed across all enabled - * links * @link_list: list to handle interrupts across all links * @shim_lock: mutex to handle concurrent rmw access to shared SHIM registers. * @shim_mask: flags to track initialization of SHIM shared registers * @shim_base: sdw shim base. * @alh_base: sdw alh base. + * @peripherals: array representing Peripherals exposed across all enabled links */ struct sdw_intel_ctx { int count; void __iomem *mmio_base; u32 link_mask; - int num_slaves; acpi_handle handle; struct sdw_intel_link_dev **ldev; - struct sdw_extended_slave_id *ids; struct list_head link_list; struct mutex shim_lock; /* lock for access to shared SHIM registers */ u32 shim_mask; u32 shim_base; u32 alh_base; + struct sdw_peripherals *peripherals; }; /** @@ -388,6 +386,7 @@ struct sdw_intel; /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms. * @debugfs_init: initialize all debugfs capabilities * @debugfs_exit: close and cleanup debugfs capabilities + * @get_link_count: fetch link count from hardware registers * @register_dai: read all PDI information and register DAIs * @check_clock_stop: throw error message if clock is not stopped. * @start_bus: normal start @@ -412,6 +411,8 @@ struct sdw_intel_hw_ops { void (*debugfs_init)(struct sdw_intel *sdw); void (*debugfs_exit)(struct sdw_intel *sdw); + int (*get_link_count)(struct sdw_intel *sdw); + int (*register_dai)(struct sdw_intel *sdw); void (*check_clock_stop)(struct sdw_intel *sdw); @@ -447,4 +448,9 @@ extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops; #define SDW_INTEL_DEV_NUM_IDA_MIN 6 +/* + * Max number of links supported in hardware + */ +#define SDW_INTEL_MAX_LINKS 5 + #endif diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index f866d5c8ed32..c46d2b8029be 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -90,6 +90,8 @@ enum spi_mem_data_dir { * @data.buswidth: number of IO lanes used to send/receive the data * @data.dtr: whether the data should be sent in DTR mode or not * @data.ecc: whether error correction is required or not + * @data.swap16: whether the byte order of 16-bit words is swapped when read + * or written in Octal DTR mode compared to STR mode. * @data.dir: direction of the transfer * @data.nbytes: number of data bytes to send/receive. Can be zero if the * operation does not involve transferring data @@ -124,7 +126,8 @@ struct spi_mem_op { u8 buswidth; u8 dtr : 1; u8 ecc : 1; - u8 __pad : 6; + u8 swap16 : 1; + u8 __pad : 5; enum spi_mem_data_dir dir; unsigned int nbytes; union { @@ -297,10 +300,13 @@ struct spi_controller_mem_ops { * struct spi_controller_mem_caps - SPI memory controller capabilities * @dtr: Supports DTR operations * @ecc: Supports operations with error correction + * @swap16: Supports swapping bytes on a 16 bit boundary when configured in + * Octal DTR */ struct spi_controller_mem_caps { bool dtr; bool ecc; + bool swap16; }; #define spi_mem_controller_is_capable(ctlr, cap) \ diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d47d5f14ff99..8497f4747e24 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -498,7 +498,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * controller has native support for memory like operations. * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). - * @slave_abort: abort the ongoing transfer request on an SPI slave controller * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that @@ -725,10 +724,7 @@ struct spi_controller { struct spi_message *message); int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); - union { - int (*slave_abort)(struct spi_controller *ctlr); - int (*target_abort)(struct spi_controller *ctlr); - }; + int (*target_abort)(struct spi_controller *ctlr); /* * These hooks are for drivers that use a generic implementation @@ -802,11 +798,6 @@ static inline void spi_controller_put(struct spi_controller *ctlr) put_device(&ctlr->dev); } -static inline bool spi_controller_is_slave(struct spi_controller *ctlr) -{ - return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave; -} - static inline bool spi_controller_is_target(struct spi_controller *ctlr) { return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target; @@ -833,21 +824,6 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, extern struct spi_controller *__spi_alloc_controller(struct device *host, unsigned int size, bool slave); -static inline struct spi_controller *spi_alloc_master(struct device *host, - unsigned int size) -{ - return __spi_alloc_controller(host, size, false); -} - -static inline struct spi_controller *spi_alloc_slave(struct device *host, - unsigned int size) -{ - if (!IS_ENABLED(CONFIG_SPI_SLAVE)) - return NULL; - - return __spi_alloc_controller(host, size, true); -} - static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) { @@ -867,21 +843,6 @@ struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave); -static inline struct spi_controller *devm_spi_alloc_master(struct device *dev, - unsigned int size) -{ - return __devm_spi_alloc_controller(dev, size, false); -} - -static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, - unsigned int size) -{ - if (!IS_ENABLED(CONFIG_SPI_SLAVE)) - return NULL; - - return __devm_spi_alloc_controller(dev, size, true); -} - static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) { @@ -1296,7 +1257,6 @@ extern int devm_spi_optimize_message(struct device *dev, struct spi_device *spi, extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); -extern int spi_slave_abort(struct spi_device *spi); extern int spi_target_abort(struct spi_device *spi); static inline size_t diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index d4cb83195f7a..c92cd43a47f4 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -24,6 +24,7 @@ struct spi_bitbang { #define BITBANG_CS_ACTIVE 1 /* normally nCS, active low */ #define BITBANG_CS_INACTIVE 0 + void (*set_mosi_idle)(struct spi_device *spi); /* txrx_bufs() may handle dma mapping for transfers that don't * already have one (transfer.{tx,rx}_dma is zero), or use PIO */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 89eb6f4c659c..9ecb0ab504e3 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_API_SMP_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index 61c49b16f69a..f6499c37157d 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -16,26 +16,25 @@ static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name, } #endif -#define spin_lock_init(slock) \ +#define __spin_lock_init(slock, name, key, percpu) \ do { \ - static struct lock_class_key __key; \ - \ rt_mutex_base_init(&(slock)->lock); \ - __rt_spin_lock_init(slock, #slock, &__key, false); \ + __rt_spin_lock_init(slock, name, key, percpu); \ } while (0) -#define local_spin_lock_init(slock) \ +#define _spin_lock_init(slock, percpu) \ do { \ static struct lock_class_key __key; \ - \ - rt_mutex_base_init(&(slock)->lock); \ - __rt_spin_lock_init(slock, #slock, &__key, true); \ + __spin_lock_init(slock, #slock, &__key, percpu); \ } while (0) -extern void rt_spin_lock(spinlock_t *lock); -extern void rt_spin_lock_nested(spinlock_t *lock, int subclass); -extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); -extern void rt_spin_unlock(spinlock_t *lock); +#define spin_lock_init(slock) _spin_lock_init(slock, false) +#define local_spin_lock_init(slock) _spin_lock_init(slock, true) + +extern void rt_spin_lock(spinlock_t *lock) __acquires(lock); +extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock); +extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock); +extern void rt_spin_unlock(spinlock_t *lock) __releases(lock); extern void rt_spin_lock_unlock(spinlock_t *lock); extern int rt_spin_trylock_bh(spinlock_t *lock); extern int rt_spin_trylock(spinlock_t *lock); @@ -132,7 +131,7 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, #define spin_trylock_irq(lock) \ __cond_lock(lock, rt_spin_trylock(lock)) -#define __spin_trylock_irqsave(lock, flags) \ +#define spin_trylock_irqsave(lock, flags) \ ({ \ int __locked; \ \ @@ -142,9 +141,6 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, __locked; \ }) -#define spin_trylock_irqsave(lock, flags) \ - __cond_lock(lock, __spin_trylock_irqsave(lock, flags)) - #define spin_is_contended(lock) (((void)(lock), 0)) static inline int spin_is_locked(spinlock_t *lock) diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h index 7f86a2016ac5..fc4e2d017c20 100644 --- a/include/linux/spinlock_types_up.h +++ b/include/linux/spinlock_types_up.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_TYPES_UP_H #ifndef __LINUX_SPINLOCK_TYPES_RAW_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif /* diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index c87204247592..1e84e71ca495 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -2,7 +2,7 @@ #define __LINUX_SPINLOCK_UP_H #ifndef __LINUX_INSIDE_SPINLOCK_H -# error "please don't include this file directly" +# error "Please do not include this file directly." #endif #include <asm/processor.h> /* for cpu_relax() */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 835bbb2d1f88..08339eb8a01c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -56,6 +56,13 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); +#ifdef CONFIG_TINY_SRCU +#define __srcu_read_lock_lite __srcu_read_lock +#define __srcu_read_unlock_lite __srcu_read_unlock +#else // #ifdef CONFIG_TINY_SRCU +int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp); +#endif // #else // #ifdef CONFIG_TINY_SRCU void synchronize_srcu(struct srcu_struct *ssp); #define SRCU_GET_STATE_COMPLETED 0x1 @@ -176,17 +183,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -#define SRCU_NMI_UNKNOWN 0x0 -#define SRCU_NMI_UNSAFE 0x1 -#define SRCU_NMI_SAFE 0x2 - -#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) -void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe); -#else -static inline void srcu_check_nmi_safety(struct srcu_struct *ssp, - bool nmi_safe) { } -#endif - /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing @@ -236,33 +232,67 @@ static inline void srcu_check_nmi_safety(struct srcu_struct *ssp, * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). * - * Note that srcu_read_lock() and the matching srcu_read_unlock() must - * occur in the same context, for example, it is illegal to invoke - * srcu_read_unlock() in an irq handler if the matching srcu_read_lock() - * was invoked in process context. + * The return value from srcu_read_lock() must be passed unaltered + * to the matching srcu_read_unlock(). Note that srcu_read_lock() and + * the matching srcu_read_unlock() must occur in the same context, for + * example, it is illegal to invoke srcu_read_unlock() in an irq handler + * if the matching srcu_read_lock() was invoked in process context. Or, + * for that matter to invoke srcu_read_unlock() from one task and the + * matching srcu_read_lock() from another. */ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); retval = __srcu_read_lock(ssp); srcu_lock_acquire(&ssp->dep_map); return retval; } /** + * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but for a light-weight + * smp_mb()-free reader. See srcu_read_lock() for more information. + * + * If srcu_read_lock_lite() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. Note that grace-period auto-expediting is disabled for _lite + * srcu_struct structures because auto-expedited grace periods invoke + * synchronize_rcu_expedited(), IPIs and all. + * + * Note that srcu_read_lock_lite() can be invoked only from those contexts + * where RCU is watching, that is, from contexts where it would be legal + * to invoke rcu_read_lock(). Otherwise, lockdep will complain. + */ +static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + srcu_check_read_flavor_lite(ssp); + retval = __srcu_read_lock_lite(ssp); + rcu_try_lock_acquire(&ssp->dep_map); + return retval; +} + +/** * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section, but in an NMI-safe manner. * See srcu_read_lock() for more information. + * + * If srcu_read_lock_nmisafe() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. */ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_nmi_safety(ssp, true); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI); retval = __srcu_read_lock_nmisafe(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; @@ -274,7 +304,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); retval = __srcu_read_lock(ssp); return retval; } @@ -303,7 +333,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) { WARN_ON_ONCE(in_nmi()); - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); return __srcu_read_lock(ssp); } @@ -318,12 +348,28 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); srcu_lock_release(&ssp->dep_map); __srcu_read_unlock(ssp, idx); } /** + * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit a light-weight SRCU read-side critical section. + */ +static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + srcu_lock_release(&ssp->dep_map); + __srcu_read_unlock_lite(ssp, idx); +} + +/** * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. * @idx: return value from corresponding srcu_read_lock(). @@ -334,7 +380,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); - srcu_check_nmi_safety(ssp, true); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI); rcu_lock_release(&ssp->dep_map); __srcu_read_unlock_nmisafe(ssp, idx); } @@ -343,7 +389,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); __srcu_read_unlock(ssp, idx); } @@ -360,7 +406,7 @@ static inline void srcu_up_read(struct srcu_struct *ssp, int idx) { WARN_ON_ONCE(idx & ~0x1); WARN_ON_ONCE(in_nmi()); - srcu_check_nmi_safety(ssp, false); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL); __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 4d96bbdb45f0..1321da803274 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -81,6 +81,9 @@ static inline void srcu_barrier(struct srcu_struct *ssp) synchronize_srcu(ssp); } +#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) +#define srcu_check_read_flavor_lite(ssp) do { } while (0) + /* Defined here to avoid size increase for non-torture kernels. */ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 8f3f72480e78..490aeecc6bb4 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -25,7 +25,7 @@ struct srcu_data { /* Read-side state. */ atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ - int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */ + int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; @@ -43,6 +43,11 @@ struct srcu_data { struct srcu_struct *ssp; }; +/* Values for ->srcu_reader_flavor. */ +#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). +#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). + /* * Node in SRCU combining tree, similar in function to rcu_data. */ @@ -129,10 +134,23 @@ struct srcu_struct { #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 +/* + * Values for initializing gp sequence fields. Higher values allow wrap arounds to + * occur earlier. + * The second value with state is useful in the case of static initialization of + * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its + * lower bits (or else it will appear to be already initialized within + * the call check_init_srcu_struct()). + */ +#define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT) +#define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1) + #define __SRCU_USAGE_INIT(name) \ { \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ - .srcu_gp_seq_needed = -1UL, \ + .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ + .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ + .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \ .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \ } @@ -191,4 +209,64 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Returns an index that must be passed to the matching + * srcu_read_unlock_lite(). + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) +{ + int idx; + + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */ + barrier(); /* Avoid leaking the critical section. */ + return idx; +} + +/* + * Removes the count for the old reader from the appropriate + * per-CPU element of the srcu_struct. Note that this may well be a + * different CPU than that which was incremented by the corresponding + * srcu_read_lock_lite(), but it must be within the same task. + * + * Note that this_cpu_inc() is an RCU read-side critical section either + * because it disables interrupts, because it is a single instruction, + * or because it is a read-modify-write atomic operation, depending on + * the whims of the architecture. + */ +static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) +{ + barrier(); /* Avoid leaking the critical section. */ + this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); +} + +void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); + +// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels. +static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) +{ + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) + return; + + // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. + __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); +} + +// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels. +static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) +{ + if (IS_ENABLED(CONFIG_PROVE_RCU)) + __srcu_check_read_flavor(ssp, read_flavor); +} + #endif diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index e9ec32fb97d4..2cc21ffcdaf9 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -147,7 +147,7 @@ static inline int stack_depot_early_init(void) { return 0; } * If the provided stack trace comes from the interrupt context, only the part * up to the interrupt entry is saved. * - * Context: Any context, but setting STACK_DEPOT_FLAG_CAN_ALLOC is required if + * Context: Any context, but unsetting STACK_DEPOT_FLAG_CAN_ALLOC is required if * alloc_pages() cannot be used from the current context. Currently * this is the case for contexts where neither %GFP_ATOMIC nor * %GFP_NOWAIT can be used (NMI, raw_spin_lock). @@ -156,7 +156,7 @@ static inline int stack_depot_early_init(void) { return 0; } */ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, unsigned int nr_entries, - gfp_t gfp_flags, + gfp_t alloc_flags, depot_flags_t depot_flags); /** @@ -175,7 +175,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, * Return: Handle of the stack trace stored in depot, 0 on failure */ depot_stack_handle_t stack_depot_save(unsigned long *entries, - unsigned int nr_entries, gfp_t gfp_flags); + unsigned int nr_entries, gfp_t alloc_flags); /** * __stack_depot_get_stack_record - Get a pointer to a stack_record struct diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 84e13bd5df28..d79ff252cfdc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -100,6 +100,7 @@ struct stmmac_dma_cfg { bool eame; bool multi_msi_en; bool dche; + bool atds; }; #define AXI_BLEN 7 @@ -137,33 +138,6 @@ struct stmmac_txq_cfg { int tbs_en; }; -/* FPE link state */ -enum stmmac_fpe_state { - FPE_STATE_OFF = 0, - FPE_STATE_CAPABLE = 1, - FPE_STATE_ENTERING_ON = 2, - FPE_STATE_ON = 3, -}; - -/* FPE link-partner hand-shaking mPacket type */ -enum stmmac_mpacket_type { - MPACKET_VERIFY = 0, - MPACKET_RESPONSE = 1, -}; - -enum stmmac_fpe_task_state_t { - __FPE_REMOVING, - __FPE_TASK_SCHED, -}; - -struct stmmac_fpe_cfg { - bool enable; /* FPE enable */ - bool hs_enable; /* FPE handshake enable */ - enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ - enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ - u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ -}; - struct stmmac_safety_feature_cfg { u32 tsoee; u32 mrxpee; @@ -231,7 +205,6 @@ struct plat_stmmacenet_data { struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; - struct stmmac_fpe_cfg *fpe_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; int has_gmac; diff --git a/include/linux/string.h b/include/linux/string.h index 9edace076ddb..493ac4862c77 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -76,12 +76,16 @@ ssize_t sized_strscpy(char *, const char *, size_t); * known size. */ #define __strscpy0(dst, src, ...) \ - sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst)) -#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size) + sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst) + \ + __must_be_cstr(dst) + __must_be_cstr(src)) +#define __strscpy1(dst, src, size) \ + sized_strscpy(dst, src, size + __must_be_cstr(dst) + __must_be_cstr(src)) #define __strscpy_pad0(dst, src, ...) \ - sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst)) -#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size) + sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst) + \ + __must_be_cstr(dst) + __must_be_cstr(src)) +#define __strscpy_pad1(dst, src, size) \ + sized_strscpy_pad(dst, src, size + __must_be_cstr(dst) + __must_be_cstr(src)) /** * strscpy - Copy a C-string into a sized buffer @@ -279,6 +283,18 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *str, char old, char new); +/** + * mem_is_zero - Check if an area of memory is all 0's. + * @s: The memory area + * @n: The size of the area + * + * Return: True if the area of memory is all 0's. + */ +static inline bool mem_is_zero(const void *s, size_t n) +{ + return !memchr_inv(s, 0, n); +} + extern void kfree_const(const void *x); extern char *kstrdup(const char *s, gfp_t gfp) __malloc; @@ -319,7 +335,6 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); -int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index d9ebe20229f8..120ca0f28e95 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -2,17 +2,32 @@ #ifndef _LINUX_STRING_CHOICES_H_ #define _LINUX_STRING_CHOICES_H_ +/* + * Here provide a series of helpers in the str_$TRUE_$FALSE format (you can + * also expand some helpers as needed), where $TRUE and $FALSE are their + * corresponding literal strings. These helpers can be used in the printing + * and also in other places where constant strings are required. Using these + * helpers offers the following benefits: + * 1) Reducing the hardcoding of strings, which makes the code more elegant + * through these simple literal-meaning helpers. + * 2) Unifying the output, which prevents the same string from being printed + * in various forms, such as enable/disable, enabled/disabled, en/dis. + * 3) Deduping by the linker, which results in a smaller binary file. + */ + #include <linux/types.h> static inline const char *str_enable_disable(bool v) { return v ? "enable" : "disable"; } +#define str_disable_enable(v) str_enable_disable(!(v)) static inline const char *str_enabled_disabled(bool v) { return v ? "enabled" : "disabled"; } +#define str_disabled_enabled(v) str_enabled_disabled(!(v)) static inline const char *str_hi_lo(bool v) { @@ -36,11 +51,25 @@ static inline const char *str_on_off(bool v) { return v ? "on" : "off"; } +#define str_off_on(v) str_on_off(!(v)) static inline const char *str_yes_no(bool v) { return v ? "yes" : "no"; } +#define str_no_yes(v) str_yes_no(!(v)) + +static inline const char *str_up_down(bool v) +{ + return v ? "up" : "down"; +} +#define str_down_up(v) str_up_down(!(v)) + +static inline const char *str_true_false(bool v) +{ + return v ? "true" : "false"; +} +#define str_false_true(v) str_true_false(!(v)) /** * str_plural - Return the simple pluralization based on English counts diff --git a/include/linux/sungem_phy.h b/include/linux/sungem_phy.h index c505f30e8b68..eecc7eb63bfb 100644 --- a/include/linux/sungem_phy.h +++ b/include/linux/sungem_phy.h @@ -40,7 +40,7 @@ enum { /* An instance of a PHY, partially borrowed from mii_if_info */ struct mii_phy { - struct mii_phy_def* def; + const struct mii_phy_def *def; u32 advertising; int mii_id; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 0c77ba488bba..fec1e8a1570c 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -151,13 +151,15 @@ struct rpc_task_setup { #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) #define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE) -#define RPC_TASK_RUNNING 0 -#define RPC_TASK_QUEUED 1 -#define RPC_TASK_ACTIVE 2 -#define RPC_TASK_NEED_XMIT 3 -#define RPC_TASK_NEED_RECV 4 -#define RPC_TASK_MSG_PIN_WAIT 5 -#define RPC_TASK_SIGNALLED 6 +enum { + RPC_TASK_RUNNING, + RPC_TASK_QUEUED, + RPC_TASK_ACTIVE, + RPC_TASK_NEED_XMIT, + RPC_TASK_NEED_RECV, + RPC_TASK_MSG_PIN_WAIT, + RPC_TASK_SIGNALLED, +}; #define rpc_test_and_set_running(t) \ test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a7d0406b9ef5..e68fecf6eab5 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -21,6 +21,7 @@ #include <linux/wait.h> #include <linux/mm.h> #include <linux/pagevec.h> +#include <linux/kthread.h> /* * @@ -33,9 +34,9 @@ * node traffic on multi-node NUMA NFS servers. */ struct svc_pool { - unsigned int sp_id; /* pool id; also node id on NUMA */ + unsigned int sp_id; /* pool id; also node id on NUMA */ struct lwq sp_xprts; /* pending transports */ - atomic_t sp_nrthreads; /* # of threads in pool */ + unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct llist_head sp_idle_threads; /* idle server threads */ @@ -66,9 +67,10 @@ enum { * We currently do not support more than one RPC program per daemon. */ struct svc_serv { - struct svc_program * sv_program; /* RPC program */ + struct svc_program * sv_programs; /* RPC programs */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + unsigned int sv_nprogs; /* Number of sv_programs */ unsigned int sv_nrthreads; /* # of server threads */ unsigned int sv_maxconn; /* max connections allowed or * '0' causing max to be based @@ -232,6 +234,11 @@ struct svc_rqst { struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + + int rq_err; /* Thread sets this to inidicate + * initialisation success. + */ + unsigned long bc_to_initval; unsigned int bc_to_retries; void ** rq_lease_breaker; /* The v4 client breaking a lease */ @@ -305,6 +312,31 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) return test_bit(RQ_VICTIM, &rqstp->rq_flags); } +/** + * svc_thread_init_status - report whether thread has initialised successfully + * @rqstp: the thread in question + * @err: errno code + * + * After performing any initialisation that could fail, and before starting + * normal work, each sunrpc svc_thread must call svc_thread_init_status() + * with an appropriate error, or zero. + * + * If zero is passed, the thread is ready and must continue until + * svc_thread_should_stop() returns true. If a non-zero error is passed + * the call will not return - the thread will exit. + */ +static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err) +{ + rqstp->rq_err = err; + /* memory barrier ensures assignment to error above is visible before + * waitqueue_active() test below completes. + */ + smp_mb(); + wake_up_var(&rqstp->rq_err); + if (err) + kthread_exit(1); +} + struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct svc_xprt *xprt; @@ -329,10 +361,9 @@ struct svc_process_info { }; /* - * List of RPC programs on the same transport endpoint + * RPC program - an array of these can use the same transport endpoint */ struct svc_program { - struct svc_program * pg_next; /* other programs (same xprt) */ u32 pg_prog; /* program number */ unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* highest version */ @@ -401,19 +432,16 @@ struct svc_procedure { */ int sunrpc_set_pool_mode(const char *val); int sunrpc_get_pool_mode(char *val, size_t size); -int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, int (*threadfn)(void *data)); -struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, - struct svc_pool *pool, int node); bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page); void svc_rqst_release_pages(struct svc_rqst *rqstp); -void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *prog, + unsigned int nprog, struct svc_stat *stats, unsigned int bufsize, int (*threadfn)(void *data)); @@ -446,11 +474,6 @@ int svc_generic_rpcbind_set(struct net *net, u32 version, int family, unsigned short proto, unsigned short port); -int svc_rpcbind_set_version(struct net *net, - const struct svc_program *progp, - u32 version, int family, - unsigned short proto, - unsigned short port); #define RPC_MAX_ADDRBUFLEN (63U) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index d33bab33099a..619fc0bd837a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -48,6 +48,7 @@ #include <linux/sunrpc/rpc_rdma.h> #include <linux/sunrpc/rpc_rdma_cid.h> #include <linux/sunrpc/svc_rdma_pcl.h> +#include <linux/sunrpc/rdma_rn.h> #include <linux/percpu_counter.h> #include <rdma/ib_verbs.h> @@ -76,6 +77,7 @@ struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ struct list_head sc_accept_q; /* Conn. waiting accept */ + struct rpcrdma_notification sc_rn; /* removal notification */ int sc_ord; /* RDMA read limit */ int sc_max_send_sges; bool sc_snd_w_inv; /* OK to use Send With Invalidate */ diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 61c455f1e1f5..2e111153f7cd 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -14,6 +14,7 @@ #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/clnt.h> #include <linux/hash.h> #include <linux/stringhash.h> #include <linux/cred.h> @@ -151,13 +152,16 @@ struct auth_ops { struct svc_xprt; -extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp); extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp); extern int svc_authorise(struct svc_rqst *rqstp); extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); extern void svc_auth_unregister(rpc_authflavor_t flavor); +extern void svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt, + const struct cred *, + struct svc_cred *); + extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 7c78ec6356b9..bf45d9e8492a 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -58,8 +58,6 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk) */ void svc_recv(struct svc_rqst *rqstp); void svc_send(struct svc_rqst *rqstp); -void svc_drop(struct svc_rqst *); -void svc_sock_update_bufs(struct svc_serv *serv); int svc_addsock(struct svc_serv *serv, struct net *net, const int fd, char *name_return, const size_t len, const struct cred *cred); diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 2f8dc47f1eb0..a2ab813a9800 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -13,7 +13,7 @@ #include <linux/uio.h> #include <asm/byteorder.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/scatterlist.h> struct bio_vec; @@ -681,6 +681,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) } /** + * xdr_stream_decode_be32 - Decode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = *p; + return 0; +} + +/** * xdr_stream_decode_u64 - Decode a 64-bit integer * @xdr: pointer to xdr_stream * @ptr: location to store 64-bit integer diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h new file mode 100644 index 000000000000..66ca3ece951a --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_builtins.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__BUILTINS_H_ +#define _SUNRPC_XDRGEN__BUILTINS_H_ + +#include <linux/sunrpc/xdr.h> + +static inline bool +xdrgen_decode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_encode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_decode_bool(struct xdr_stream *xdr, bool *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = (*p != xdr_zero); + return true; +} + +static inline bool +xdrgen_encode_bool(struct xdr_stream *xdr, bool val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = val ? xdr_one : xdr_zero; + return true; +} + +static inline bool +xdrgen_decode_int(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_int(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_int(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_int(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_long(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_long(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_long(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_long(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_hyper(struct xdr_stream *xdr, s64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_hyper(struct xdr_stream *xdr, s64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_unsigned_hyper(struct xdr_stream *xdr, u64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_hyper(struct xdr_stream *xdr, u64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (unsigned char *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_string(struct xdr_stream *xdr, string val, u32 maxlen) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +static inline bool +xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (u8 *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_opaque(struct xdr_stream *xdr, opaque val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +#endif /* _SUNRPC_XDRGEN__BUILTINS_H_ */ diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h new file mode 100644 index 000000000000..20c7270aa64d --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_defs.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__DEFS_H_ +#define _SUNRPC_XDRGEN__DEFS_H_ + +#define TRUE (true) +#define FALSE (false) + +typedef struct { + u32 len; + unsigned char *data; +} string; + +typedef struct { + u32 len; + u8 *data; +} opaque; + +#define XDR_void (0) +#define XDR_bool (1) +#define XDR_int (1) +#define XDR_unsigned_int (1) +#define XDR_long (1) +#define XDR_unsigned_long (1) +#define XDR_hyper (2) +#define XDR_unsigned_hyper (2) + +#endif /* _SUNRPC_XDRGEN__DEFS_H_ */ diff --git a/include/linux/swap.h b/include/linux/swap.h index ba7ea95d1c57..f3e0ac20c2e8 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -243,22 +243,24 @@ enum { * free clusters are organized into a list. We fetch an entry from the list to * get a free cluster. * - * The data field stores next cluster if the cluster is free or cluster usage - * counter otherwise. The flags field determines if a cluster is free. This is - * protected by swap_info_struct.lock. + * The flags field determines if a cluster is free. This is + * protected by cluster lock. */ struct swap_cluster_info { spinlock_t lock; /* * Protect swap_cluster_info fields - * and swap_info_struct->swap_map - * elements correspond to the swap - * cluster + * other than list, and swap_info_struct->swap_map + * elements corresponding to the swap cluster. */ - unsigned int data:24; - unsigned int flags:8; + u16 count; + u8 flags; + u8 order; + struct list_head list; }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ -#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ +#define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */ +#define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */ +#define CLUSTER_FLAG_FULL 8 /* This cluster is on full list */ /* * The first page in the swap file is the swap header, which is always marked @@ -283,11 +285,6 @@ struct percpu_cluster { unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ }; -struct swap_cluster_list { - struct swap_cluster_info head; - struct swap_cluster_info tail; -}; - /* * The in-memory structure used to track swap areas. */ @@ -299,8 +296,15 @@ struct swap_info_struct { signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ + unsigned long *zeromap; /* kvmalloc'ed bitmap to track zero pages */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ - struct swap_cluster_list free_clusters; /* free clusters list */ + struct list_head free_clusters; /* free clusters list */ + struct list_head full_clusters; /* full clusters list */ + struct list_head nonfull_clusters[SWAP_NR_ORDERS]; + /* list of cluster that contains at least one free slot */ + struct list_head frag_clusters[SWAP_NR_ORDERS]; + /* list of cluster that are fragmented or contented */ + unsigned int frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ @@ -331,7 +335,8 @@ struct swap_info_struct { * list. */ struct work_struct discard_work; /* discard worker */ - struct swap_cluster_list discard_clusters; /* discard clusters list */ + struct work_struct reclaim_work; /* reclaim worker */ + struct list_head discard_clusters; /* discard clusters list */ struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. @@ -478,9 +483,9 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); -extern void swap_shmem_alloc(swp_entry_t); +extern void swap_shmem_alloc(swp_entry_t, int); extern int swap_duplicate(swp_entry_t); -extern int swapcache_prepare(swp_entry_t); +extern int swapcache_prepare(swp_entry_t entry, int nr); extern void swap_free_nr(swp_entry_t entry, int nr_pages); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); @@ -545,7 +550,7 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) return 0; } -static inline void swap_shmem_alloc(swp_entry_t swp) +static inline void swap_shmem_alloc(swp_entry_t swp, int nr) { } @@ -554,7 +559,7 @@ static inline int swap_duplicate(swp_entry_t swp) return 0; } -static inline int swapcache_prepare(swp_entry_t swp) +static inline int swapcache_prepare(swp_entry_t swp, int nr) { return 0; } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index cb468e418ea1..96f26e29fefe 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -426,9 +426,19 @@ typedef unsigned long pte_marker; * "Poisoned" here is meant in the very general sense of "future accesses are * invalid", instead of referring very specifically to hardware memory errors. * This marker is meant to represent any of various different causes of this. + * + * Note that, when encountered by the faulting logic, PTEs with this marker will + * result in VM_FAULT_HWPOISON and thus regardless trigger hardware memory error + * logic. */ #define PTE_MARKER_POISONED BIT(1) -#define PTE_MARKER_MASK (BIT(2) - 1) +/* + * Indicates that, on fault, this PTE will case a SIGSEGV signal to be + * sent. This means guard markers behave in effect as if the region were mapped + * PROT_NONE, rather than if they were a memory hole or equivalent. + */ +#define PTE_MARKER_GUARD BIT(2) +#define PTE_MARKER_MASK (BIT(3) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { @@ -464,6 +474,18 @@ static inline int is_poisoned_swp_entry(swp_entry_t entry) { return is_pte_marker_entry(entry) && (pte_marker_get(entry) & PTE_MARKER_POISONED); + +} + +static inline swp_entry_t make_guard_swp_entry(void) +{ + return make_pte_marker_entry(PTE_MARKER_GUARD); +} + +static inline int is_guard_swp_entry(swp_entry_t entry) +{ + return is_pte_marker_entry(entry) && + (pte_marker_get(entry) & PTE_MARKER_GUARD); } /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 4bcf6754738d..c6333204d451 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -77,6 +77,7 @@ struct cachestat_range; struct cachestat; struct statmount; struct mnt_id_req; +struct xattr_args; #include <linux/types.h> #include <linux/aio_abi.h> @@ -338,23 +339,35 @@ asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, void __user *arg, unsigned int nr_args); asmlinkage long sys_setxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); +asmlinkage long sys_setxattrat(int dfd, const char __user *path, unsigned int at_flags, + const char __user *name, + const struct xattr_args __user *args, size_t size); asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_getxattr(const char __user *path, const char __user *name, void __user *value, size_t size); +asmlinkage long sys_getxattrat(int dfd, const char __user *path, unsigned int at_flags, + const char __user *name, + struct xattr_args __user *args, size_t size); asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size); asmlinkage long sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size); asmlinkage long sys_listxattr(const char __user *path, char __user *list, size_t size); +asmlinkage long sys_listxattrat(int dfd, const char __user *path, + unsigned int at_flags, + char __user *list, size_t size); asmlinkage long sys_llistxattr(const char __user *path, char __user *list, size_t size); asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size); asmlinkage long sys_removexattr(const char __user *path, const char __user *name); +asmlinkage long sys_removexattrat(int dfd, const char __user *path, + unsigned int at_flags, + const char __user *name); asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); @@ -870,7 +883,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, #endif asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, - int __user *mnt_id, int flag); + void __user *mnt_id, int flag); asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index aa4c6d44aaa0..40a6ac6c9713 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -90,9 +90,7 @@ int proc_do_static_key(const struct ctl_table *table, int write, void *buffer, /* * Register a set of sysctl names by calling register_sysctl - * with an initialised array of struct ctl_table's. An entry with - * NULL procname terminates the table. table->de will be - * set up by the registration and need not be initialised in advance. + * with an initialised array of struct ctl_table's. * * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. @@ -133,7 +131,7 @@ static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { - const char *procname; /* Text ID for /proc/sys, or zero */ + const char *procname; /* Text ID for /proc/sys */ void *data; int maxlen; umode_t mode; @@ -162,7 +160,7 @@ struct ctl_node { struct ctl_table_header { union { struct { - struct ctl_table *ctl_table; + const struct ctl_table *ctl_table; int ctl_table_size; int used; int count; @@ -223,13 +221,13 @@ extern void retire_sysctl_set(struct ctl_table_set *set); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, - const char *path, struct ctl_table *table, size_t table_size); -struct ctl_table_header *register_sysctl_sz(const char *path, struct ctl_table *table, + const char *path, const struct ctl_table *table, size_t table_size); +struct ctl_table_header *register_sysctl_sz(const char *path, const struct ctl_table *table, size_t table_size); void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init_bases(void); -extern void __register_sysctl_init(const char *path, struct ctl_table *table, +extern void __register_sysctl_init(const char *path, const struct ctl_table *table, const char *table_name, size_t table_size); #define register_sysctl_init(path, table) \ __register_sysctl_init(path, table, #table, ARRAY_SIZE(table)) @@ -251,7 +249,7 @@ extern int no_unaligned_warning; #else /* CONFIG_SYSCTL */ -static inline void register_sysctl_init(const char *path, struct ctl_table *table) +static inline void register_sysctl_init(const char *path, const struct ctl_table *table) { } @@ -261,7 +259,7 @@ static inline struct ctl_table_header *register_sysctl_mount_point(const char *p } static inline struct ctl_table_header *register_sysctl_sz(const char *path, - struct ctl_table *table, + const struct ctl_table *table, size_t table_size) { return NULL; diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index bef5f06a91de..07cbab516942 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -60,12 +60,19 @@ struct efifb_dmi_info { void sysfb_disable(struct device *dev); +bool sysfb_handles_screen_info(void); + #else /* CONFIG_SYSFB */ static inline void sysfb_disable(struct device *dev) { } +static inline bool sysfb_handles_screen_info(void) +{ + return false; +} + #endif /* CONFIG_SYSFB */ #ifdef CONFIG_EFI diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index c4e64dc11206..0f2fcd244523 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -87,6 +87,11 @@ do { \ * SYSFS_GROUP_VISIBLE() when assigning this callback to * specify separate _group_visible() and _attr_visible() * handlers. + * @bin_size: + * Optional: Function to return the size of a binary attribute + * of the group. Will be called repeatedly for each binary + * attribute in the group. Overwrites the size field embedded + * inside the attribute itself. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. @@ -96,9 +101,15 @@ struct attribute_group { umode_t (*is_visible)(struct kobject *, struct attribute *, int); umode_t (*is_bin_visible)(struct kobject *, - struct bin_attribute *, int); + const struct bin_attribute *, int); + size_t (*bin_size)(struct kobject *, + const struct bin_attribute *, + int); struct attribute **attrs; - struct bin_attribute **bin_attrs; + union { + struct bin_attribute **bin_attrs; + const struct bin_attribute *const *bin_attrs_new; + }; }; #define SYSFS_PREALLOC 010000 @@ -191,22 +202,22 @@ struct attribute_group { * attributes, the group visibility is determined by the function * specified to is_visible() not is_bin_visible() */ -#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ - static inline umode_t sysfs_group_visible_##name( \ - struct kobject *kobj, struct bin_attribute *attr, int n) \ - { \ - if (n == 0 && !name##_group_visible(kobj)) \ - return SYSFS_GROUP_INVISIBLE; \ - return name##_attr_visible(kobj, attr, n); \ +#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, const struct bin_attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ } -#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ - static inline umode_t sysfs_group_visible_##name( \ - struct kobject *kobj, struct bin_attribute *a, int n) \ - { \ - if (n == 0 && !name##_group_visible(kobj)) \ - return SYSFS_GROUP_INVISIBLE; \ - return a->mode; \ +#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, const struct bin_attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ } #define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn @@ -297,11 +308,15 @@ struct bin_attribute { struct address_space *(*f_mapping)(void); ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); + ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *, + char *, loff_t, size_t); ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, char *, loff_t, size_t); - loff_t (*llseek)(struct file *, struct kobject *, struct bin_attribute *, + ssize_t (*write_new)(struct file *, struct kobject *, + const struct bin_attribute *, char *, loff_t, size_t); + loff_t (*llseek)(struct file *, struct kobject *, const struct bin_attribute *, loff_t, int); - int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr, + int (*mmap)(struct file *, struct kobject *, const struct bin_attribute *attr, struct vm_area_struct *vma); }; @@ -317,25 +332,36 @@ struct bin_attribute { */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) +typedef ssize_t __sysfs_bin_rw_handler_new(struct file *, struct kobject *, + const struct bin_attribute *, char *, loff_t, size_t); + /* macros to create static binary attributes easier */ #define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ - .read = _read, \ - .write = _write, \ + .read = _Generic(_read, \ + __sysfs_bin_rw_handler_new * : NULL, \ + default : _read \ + ), \ + .read_new = _Generic(_read, \ + __sysfs_bin_rw_handler_new * : _read, \ + default : NULL \ + ), \ + .write = _Generic(_write, \ + __sysfs_bin_rw_handler_new * : NULL, \ + default : _write \ + ), \ + .write_new = _Generic(_write, \ + __sysfs_bin_rw_handler_new * : _write, \ + default : NULL \ + ), \ .size = _size, \ } -#define __BIN_ATTR_RO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .read = _name##_read, \ - .size = _size, \ -} +#define __BIN_ATTR_RO(_name, _size) \ + __BIN_ATTR(_name, 0444, _name##_read, NULL, _size) -#define __BIN_ATTR_WO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0200 }, \ - .write = _name##_write, \ - .size = _size, \ -} +#define __BIN_ATTR_WO(_name, _size) \ + __BIN_ATTR(_name, 0200, NULL, _name##_write, _size) #define __BIN_ATTR_RW(_name, _size) \ __BIN_ATTR(_name, 0644, _name##_read, _name##_write, _size) @@ -356,11 +382,8 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size) struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size) -#define __BIN_ATTR_ADMIN_RO(_name, _size) { \ - .attr = { .name = __stringify(_name), .mode = 0400 }, \ - .read = _name##_read, \ - .size = _size, \ -} +#define __BIN_ATTR_ADMIN_RO(_name, _size) \ + __BIN_ATTR(_name, 0400, _name##_read, NULL, _size) #define __BIN_ATTR_ADMIN_RW(_name, _size) \ __BIN_ATTR(_name, 0600, _name##_read, _name##_write, _size) @@ -371,10 +394,8 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size) #define BIN_ATTR_ADMIN_RW(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size) -#define __BIN_ATTR_SIMPLE_RO(_name, _mode) { \ - .attr = { .name = __stringify(_name), .mode = _mode }, \ - .read = sysfs_bin_attr_simple_read, \ -} +#define __BIN_ATTR_SIMPLE_RO(_name, _mode) \ + __BIN_ATTR(_name, _mode, sysfs_bin_attr_simple_read, NULL, 0) #define BIN_ATTR_SIMPLE_RO(_name) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_SIMPLE_RO(_name, 0444) diff --git a/include/linux/task_work.h b/include/linux/task_work.h index cf5e7e891a77..2964171856e0 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -14,11 +14,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func) } enum task_work_notify_mode { - TWA_NONE, + TWA_NONE = 0, TWA_RESUME, TWA_SIGNAL, TWA_SIGNAL_NO_IPI, TWA_NMI_CURRENT, + + TWA_FLAGS = 0xff00, + TWAF_NO_ALLOC = 0x0100, }; static inline bool task_work_pending(struct task_struct *task) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6a5e08b937b3..f88daaa76d83 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -200,7 +200,6 @@ struct tcp_sock { /* TX read-mostly hotpath cache lines */ __cacheline_group_begin(tcp_sock_read_tx); - /* timestamp of last sent data packet (for restart window) */ u32 max_window; /* Maximal window ever seen from peer */ u32 rcv_ssthresh; /* Current window clamp */ u32 reordering; /* Packet reordering metric. */ @@ -263,7 +262,7 @@ struct tcp_sock { u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ u32 pushed_seq; /* Last pushed seq, required to talk to windows */ - u32 lsndtime; + u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 mdev_us; /* medium deviation */ u32 rtt_seq; /* sequence number to update rttvar */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */ diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h index efd16ed52315..a38494d6b5f4 100644 --- a/include/linux/tee_core.h +++ b/include/linux/tee_core.h @@ -155,6 +155,18 @@ int tee_device_register(struct tee_device *teedev); void tee_device_unregister(struct tee_device *teedev); /** + * tee_device_set_dev_groups() - Set device attribute groups + * @teedev: Device to register + * @dev_groups: Attribute groups + * + * Assigns the provided @dev_groups to the @teedev to be registered later + * with tee_device_register(). Calling this function is optional, but if + * it's called it must be called before tee_device_register(). + */ +void tee_device_set_dev_groups(struct tee_device *teedev, + const struct attribute_group **dev_groups); + +/** * tee_session_calc_client_uuid() - Calculates client UUID for session * @uuid: Resulting UUID * @connection_method: Connection method for session (TEE_IOCTL_LOGIN_*) diff --git a/include/linux/text-patching.h b/include/linux/text-patching.h new file mode 100644 index 000000000000..ad5877ab0855 --- /dev/null +++ b/include/linux/text-patching.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_TEXT_PATCHING_H +#define _LINUX_TEXT_PATCHING_H + +#include <asm/text-patching.h> + +#ifndef text_poke_copy +static inline void *text_poke_copy(void *dst, const void *src, size_t len) +{ + return memcpy(dst, src, len); +} +#define text_poke_copy text_poke_copy +#endif + +#endif /* _LINUX_TEXT_PATCHING_H */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index b86ddca46b9e..754802478b96 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -56,6 +56,9 @@ enum thermal_notify_event { THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ + THERMAL_TZ_ADD_THRESHOLD, /* Threshold added */ + THERMAL_TZ_DEL_THRESHOLD, /* Threshold deleted */ + THERMAL_TZ_FLUSH_THRESHOLDS, /* All thresholds deleted */ }; /** @@ -85,11 +88,17 @@ struct thermal_trip { struct thermal_zone_device; +struct cooling_spec { + unsigned long upper; /* Highest cooling state */ + unsigned long lower; /* Lowest cooling state */ + unsigned int weight; /* Cooling device weight */ +}; + struct thermal_zone_device_ops { - int (*bind) (struct thermal_zone_device *, - struct thermal_cooling_device *); - int (*unbind) (struct thermal_zone_device *, - struct thermal_cooling_device *); + bool (*should_bind) (struct thermal_zone_device *, + const struct thermal_trip *, + struct thermal_cooling_device *, + struct cooling_spec *); int (*get_temp) (struct thermal_zone_device *, int *); int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, @@ -131,6 +140,9 @@ struct thermal_cooling_device { #endif }; +DEFINE_GUARD(cooling_dev, struct thermal_cooling_device *, mutex_lock(&_T->lock), + mutex_unlock(&_T->lock)) + /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { const char *governor_name; @@ -203,15 +215,12 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev, } #endif -int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, - struct thermal_trip *trip); int for_each_thermal_trip(struct thermal_zone_device *tz, int (*cb)(struct thermal_trip *, void *), void *data); int thermal_zone_for_each_trip(struct thermal_zone_device *tz, int (*cb)(struct thermal_trip *, void *), void *data); -int thermal_zone_get_num_trips(struct thermal_zone_device *tz); void thermal_zone_set_trip_temp(struct thermal_zone_device *tz, struct thermal_trip *trip, int temp); @@ -240,20 +249,6 @@ const char *thermal_zone_device_type(struct thermal_zone_device *tzd); int thermal_zone_device_id(struct thermal_zone_device *tzd); struct device *thermal_zone_device(struct thermal_zone_device *tzd); -int thermal_bind_cdev_to_trip(struct thermal_zone_device *tz, - const struct thermal_trip *trip, - struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower, - unsigned int weight); -int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, - struct thermal_cooling_device *, - unsigned long, unsigned long, - unsigned int); -int thermal_unbind_cdev_from_trip(struct thermal_zone_device *tz, - const struct thermal_trip *trip, - struct thermal_cooling_device *cdev); -int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, - struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, enum thermal_notify_event); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9ea0b28068f4..cf2446c9c30d 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -59,6 +59,14 @@ enum syscall_work_bit { #include <asm/thread_info.h> +#ifndef TIF_NEED_RESCHED_LAZY +#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY +#error Inconsistent PREEMPT_LAZY +#endif +#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED +#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED +#endif + #ifdef __KERNEL__ #ifndef arch_set_restart_data @@ -179,22 +187,27 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H -static __always_inline bool tif_need_resched(void) +static __always_inline bool tif_test_bit(int bit) { - return arch_test_bit(TIF_NEED_RESCHED, + return arch_test_bit(bit, (unsigned long *)(¤t_thread_info()->flags)); } #else -static __always_inline bool tif_need_resched(void) +static __always_inline bool tif_test_bit(int bit) { - return test_bit(TIF_NEED_RESCHED, + return test_bit(bit, (unsigned long *)(¤t_thread_info()->flags)); } #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ +static __always_inline bool tif_need_resched(void) +{ + return tif_test_bit(TIF_NEED_RESCHED); +} + #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES static inline int arch_within_stack_frames(const void * const stack, const void * const stackend, diff --git a/include/linux/tick.h b/include/linux/tick.h index 72744638c5b0..b8ddc8e631a3 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -20,12 +20,10 @@ extern void __init tick_init(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); -extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } -static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) @@ -251,12 +249,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk, if (tick_nohz_full_enabled()) tick_nohz_dep_set_task(tsk, bit); } + static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { if (tick_nohz_full_enabled()) tick_nohz_dep_clear_task(tsk, bit); } + +static inline void tick_dep_init_task(struct task_struct *tsk) +{ + atomic_set(&tsk->tick_dep_mask, 0); +} + static inline void tick_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { @@ -290,6 +295,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit) { } static inline void tick_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit) { } +static inline void tick_dep_init_task(struct task_struct *tsk) { } static inline void tick_dep_set_signal(struct task_struct *tsk, enum tick_dep_bits bit) { } static inline void tick_dep_clear_signal(struct signal_struct *signal, diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 84ff2844df2a..e39d4d563b19 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -26,7 +26,7 @@ * occupies a single 64byte cache line. * * The struct is separate from struct timekeeper as it is also used - * for a fast NMI safe accessors. + * for the fast NMI safe accessors. * * @base_real is for the fast NMI safe accessor to allow reading clock * realtime from any context. @@ -44,36 +44,41 @@ struct tk_read_base { /** * struct timekeeper - Structure holding internal timekeeping values. - * @tkr_mono: The readout base structure for CLOCK_MONOTONIC - * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW - * @xtime_sec: Current CLOCK_REALTIME time in seconds - * @ktime_sec: Current CLOCK_MONOTONIC time in seconds - * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset - * @offs_real: Offset clock monotonic -> clock realtime - * @offs_boot: Offset clock monotonic -> clock boottime - * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds - * @clock_was_set_seq: The sequence number of clock was set events - * @cs_was_changed_seq: The sequence number of clocksource change events - * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second - * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds - * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset - * @cycle_interval: Number of clock cycles in one NTP interval - * @xtime_interval: Number of clock shifted nano seconds in one NTP - * interval. - * @xtime_remainder: Shifted nano seconds left over when rounding - * @cycle_interval - * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. - * @ntp_error: Difference between accumulated time and NTP time in ntp - * shifted nano seconds. - * @ntp_error_shift: Shift conversion between clock shifted nano seconds and - * ntp shifted nano seconds. - * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) - * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) - * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC + * @xtime_sec: Current CLOCK_REALTIME time in seconds + * @ktime_sec: Current CLOCK_MONOTONIC time in seconds + * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset + * @offs_real: Offset clock monotonic -> clock realtime + * @offs_boot: Offset clock monotonic -> clock boottime + * @offs_tai: Offset clock monotonic -> clock tai + * @tai_offset: The current UTC to TAI offset in seconds + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds + * @clock_was_set_seq: The sequence number of clock was set events + * @cs_was_changed_seq: The sequence number of clocksource change events + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset + * @cycle_interval: Number of clock cycles in one NTP interval + * @xtime_interval: Number of clock shifted nano seconds in one NTP + * interval. + * @xtime_remainder: Shifted nano seconds left over when rounding + * @cycle_interval + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. + * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second + * @ntp_tick: The ntp_tick_length() value currently being + * used. This cached copy ensures we consistently + * apply the tick length for an entire tick, as + * ntp_tick_length may change mid-tick, and we don't + * want to apply that new value to the tick in + * progress. + * @ntp_error: Difference between accumulated time and NTP time in ntp + * shifted nano seconds. + * @ntp_error_shift: Shift conversion between clock shifted nano seconds and + * ntp shifted nano seconds. + * @ntp_err_mult: Multiplication factor for scaled math conversion + * @skip_second_overflow: Flag used to avoid updating NTP twice with same second * * Note: For timespec(64) based interfaces wall_to_monotonic is what - * we need to add to xtime (or xtime corrected for sub jiffie times) + * we need to add to xtime (or xtime corrected for sub jiffy times) * to get to monotonic time. Monotonic is pegged at zero at system * boot time, so wall_to_monotonic will be negative, however, we will * ALWAYS keep the tv_nsec part positive so we can use the usual @@ -88,10 +93,28 @@ struct tk_read_base { * * @monotonic_to_boottime is a timespec64 representation of @offs_boot to * accelerate the VDSO update for CLOCK_BOOTTIME. + * + * The cacheline ordering of the structure is optimized for in kernel usage of + * the ktime_get() and ktime_get_ts64() family of time accessors. Struct + * timekeeper is prepended in the core timekeeping code with a sequence count, + * which results in the following cacheline layout: + * + * 0: seqcount, tkr_mono + * 1: xtime_sec ... tai_offset + * 2: tkr_raw, raw_sec + * 3,4: Internal variables + * + * Cacheline 0,1 contain the data which is used for accessing + * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the + * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal + * variables which are only accessed during timekeeper updates once per + * tick. */ struct timekeeper { + /* Cacheline 0 (together with prepended seqcount of timekeeper core): */ struct tk_read_base tkr_mono; - struct tk_read_base tkr_raw; + + /* Cacheline 1: */ u64 xtime_sec; unsigned long ktime_sec; struct timespec64 wall_to_monotonic; @@ -99,43 +122,28 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; + + /* Cacheline 2: */ + struct tk_read_base tkr_raw; + u64 raw_sec; + + /* Cachline 3 and 4 (timekeeping internal variables): */ unsigned int clock_was_set_seq; u8 cs_was_changed_seq; - ktime_t next_leap_ktime; - u64 raw_sec; + struct timespec64 monotonic_to_boot; - /* The following members are for timekeeping internal use */ u64 cycle_interval; u64 xtime_interval; s64 xtime_remainder; u64 raw_interval; - /* The ntp_tick_length() value currently being used. - * This cached copy ensures we consistently apply the tick - * length for an entire tick, as ntp_tick_length may change - * mid-tick, and we don't want to apply that new value to - * the tick in progress. - */ + + ktime_t next_leap_ktime; u64 ntp_tick; - /* Difference between accumulated time and NTP time in ntp - * shifted nano seconds. */ s64 ntp_error; u32 ntp_error_shift; u32 ntp_err_mult; - /* Flag used to avoid updating NTP twice with same second */ u32 skip_second_overflow; -#ifdef CONFIG_DEBUG_TIMEKEEPING - long last_warning; - /* - * These simple flag variables are managed - * without locks, which is racy, but they are - * ok since we don't really care about being - * super precise about how many events were - * seen, just that a problem was observed. - */ - int underflow_seen; - int overflow_seen; -#endif }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index fc12a9ba2c88..0e035f675efe 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -45,6 +45,11 @@ extern void ktime_get_real_ts64(struct timespec64 *tv); extern void ktime_get_coarse_ts64(struct timespec64 *ts); extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); +/* Multigrain timestamp interfaces */ +extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); +extern void ktime_get_real_ts64_mg(struct timespec64 *ts); +extern unsigned long timekeeping_get_mg_floor_swaps(void); + void getboottime64(struct timespec64 *ts); /* @@ -275,6 +280,7 @@ struct ktime_timestamps { * counter value * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time + * @boot: Boot time * @raw: Monotonic raw system time * @cs_id: Clocksource ID * @clock_was_set_seq: The sequence number of clock-was-set events @@ -283,6 +289,7 @@ struct ktime_timestamps { struct system_time_snapshot { u64 cycles; ktime_t real; + ktime_t boot; ktime_t raw; enum clocksource_ids cs_id; unsigned int clock_was_set_seq; diff --git a/include/linux/timex.h b/include/linux/timex.h index 3871b06bd302..4ee32eff3f22 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -139,14 +139,6 @@ unsigned long random_get_entropy_fallback(void); #define MAXSEC 2048 /* max interval between updates (s) */ #define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */ -/* - * kernel variables - * Note: maximum error = NTP sync distance = dispersion + delay / 2; - * estimated error = NTP dispersion. - */ -extern unsigned long tick_usec; /* USER_HZ period (usec) */ -extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ - /* Required to safely shift negative values */ #define shift_right(x, s) ({ \ __typeof__(x) __x = (x); \ diff --git a/include/linux/tpm.h b/include/linux/tpm.h index e93ee8d936a9..20a40ade8030 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -421,6 +421,7 @@ void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value); u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset); u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset); u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset); +void tpm_buf_append_handle(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle); /* * Check if TPM device is in the firmware upgrade mode. @@ -505,6 +506,8 @@ void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, u8 *passphrase, int passphraselen); +void tpm_buf_append_auth(struct tpm_chip *chip, struct tpm_buf *buf, + u8 attributes, u8 *passphrase, int passphraselen); static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, @@ -537,7 +540,7 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, int rc); void tpm2_end_auth_session(struct tpm_chip *chip); #else -#include <asm/unaligned.h> +#include <linux/unaligned.h> static inline int tpm2_start_auth_session(struct tpm_chip *chip) { diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 7d68a5cc5881..891368e82558 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -157,7 +157,7 @@ struct tcg_algorithm_info { * Return: size of the event on success, 0 on failure */ -static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, +static __always_inline u32 __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, struct tcg_pcr_event *event_header, bool do_mapping) { diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 42bedcddd511..2a5df5b62cfc 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -184,7 +184,7 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, - TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED_LAZY = 0x02, TRACE_FLAG_NEED_RESCHED = 0x04, TRACE_FLAG_HARDIRQ = 0x08, TRACE_FLAG_SOFTIRQ = 0x10, @@ -193,7 +193,6 @@ enum trace_flag_type { TRACE_FLAG_BH_OFF = 0x80, }; -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) { unsigned int irq_status = irqs_disabled_flags(irqflags) ? @@ -207,17 +206,6 @@ static inline unsigned int tracing_gen_ctx(void) local_save_flags(irqflags); return tracing_gen_ctx_flags(irqflags); } -#else - -static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) -{ - return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -} -static inline unsigned int tracing_gen_ctx(void) -{ - return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); -} -#endif static inline unsigned int tracing_gen_ctx_dec(void) { @@ -326,7 +314,6 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, void trace_event_buffer_commit(struct trace_event_buffer *fbuffer); enum { - TRACE_EVENT_FL_FILTERED_BIT, TRACE_EVENT_FL_CAP_ANY_BIT, TRACE_EVENT_FL_NO_SET_FILTER_BIT, TRACE_EVENT_FL_IGNORE_ENABLE_BIT, @@ -341,7 +328,6 @@ enum { /* * Event flags: - * FILTERED - The event has a filter attached * CAP_ANY - Any user can enable for perf * NO_SET_FILTER - Set when filter has error and is to be ignored * IGNORE_ENABLE - For trace internal events, do not enable with debugfs file @@ -356,7 +342,6 @@ enum { * to a tracepoint yet, then it is cleared when it is. */ enum { - TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), TRACE_EVENT_FL_CAP_ANY = (1 << TRACE_EVENT_FL_CAP_ANY_BIT), TRACE_EVENT_FL_NO_SET_FILTER = (1 << TRACE_EVENT_FL_NO_SET_FILTER_BIT), TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), @@ -381,7 +366,6 @@ struct trace_event_call { }; struct trace_event event; char *print_fmt; - struct event_filter *filter; /* * Static events can disappear with modules, * where as dynamic ones need their own ref count. diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 4dc4955f0fbf..aebf0571c736 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -29,16 +29,22 @@ struct tracepoint_func { int prio; }; +struct tracepoint_ext { + int (*regfunc)(void); + void (*unregfunc)(void); + /* Flags. */ + unsigned int faultable:1; +}; + struct tracepoint { const char *name; /* Tracepoint name */ - struct static_key key; + struct static_key_false key; struct static_call_key *static_call_key; void *static_call_tramp; void *iterator; void *probestub; - int (*regfunc)(void); - void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; + struct tracepoint_ext *ext; }; #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS @@ -83,7 +89,7 @@ struct bpf_raw_event_map { #ifdef CONFIG_TRACEPOINTS # define tracepoint_enabled(tp) \ - static_key_false(&(__tracepoint_##tp).key) + static_branch_unlikely(&(__tracepoint_##tp).key) #else # define tracepoint_enabled(tracepoint) false #endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 6be396bb4297..76d9055b2cff 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/rcupdate.h> +#include <linux/rcupdate_trace.h> #include <linux/tracepoint-defs.h> #include <linux/static_call.h> @@ -32,8 +33,6 @@ struct trace_eval_map { #define TRACEPOINT_DEFAULT_PRIO 10 -extern struct srcu_struct tracepoint_srcu; - extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int @@ -64,6 +63,13 @@ struct tp_module { bool trace_module_has_bad_taint(struct module *mod); extern int register_tracepoint_module_notifier(struct notifier_block *nb); extern int unregister_tracepoint_module_notifier(struct notifier_block *nb); +void for_each_module_tracepoint(void (*fct)(struct tracepoint *, + struct module *, void *), + void *priv); +void for_each_tracepoint_in_module(struct module *, + void (*fct)(struct tracepoint *, + struct module *, void *), + void *priv); #else static inline bool trace_module_has_bad_taint(struct module *mod) { @@ -79,22 +85,49 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb) { return 0; } +static inline +void for_each_module_tracepoint(void (*fct)(struct tracepoint *, + struct module *, void *), + void *priv) +{ +} +static inline +void for_each_tracepoint_in_module(struct module *mod, + void (*fct)(struct tracepoint *, + struct module *, void *), + void *priv) +{ +} #endif /* CONFIG_MODULES */ /* * tracepoint_synchronize_unregister must be called between the last tracepoint * probe unregistration and the end of module exit to make sure there is no * caller executing a probe when it is freed. + * + * An alternative is to use the following for batch reclaim associated + * with a given tracepoint: + * + * - tracepoint_is_faultable() == false: call_rcu() + * - tracepoint_is_faultable() == true: call_rcu_tasks_trace() */ #ifdef CONFIG_TRACEPOINTS static inline void tracepoint_synchronize_unregister(void) { - synchronize_srcu(&tracepoint_srcu); + synchronize_rcu_tasks_trace(); synchronize_rcu(); } +static inline bool tracepoint_is_faultable(struct tracepoint *tp) +{ + return tp->ext && tp->ext->faultable; +} #else static inline void tracepoint_synchronize_unregister(void) { } +static inline bool tracepoint_is_faultable(struct tracepoint *tp) +{ + return false; +} #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS @@ -177,65 +210,16 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #endif /* CONFIG_HAVE_STATIC_CALL */ /* - * ARCH_WANTS_NO_INSTR archs are expected to have sanitized entry and idle - * code that disallow any/all tracing/instrumentation when RCU isn't watching. + * Declare an exported function that Rust code can call to trigger this + * tracepoint. This function does not include the static branch; that is done + * in Rust to avoid a function call when the tracepoint is disabled. */ -#ifdef CONFIG_ARCH_WANTS_NO_INSTR -#define RCUIDLE_COND(rcuidle) (rcuidle) -#else -/* srcu can't be used from NMI */ -#define RCUIDLE_COND(rcuidle) (rcuidle && in_nmi()) -#endif - -/* - * it_func[0] is never NULL because there is at least one element in the array - * when the array itself is non NULL. - */ -#define __DO_TRACE(name, args, cond, rcuidle) \ - do { \ - int __maybe_unused __idx = 0; \ - \ - if (!(cond)) \ - return; \ - \ - if (WARN_ONCE(RCUIDLE_COND(rcuidle), \ - "Bad RCU usage for tracepoint")) \ - return; \ - \ - /* keep srcu and sched-rcu usage consistent */ \ - preempt_disable_notrace(); \ - \ - /* \ - * For rcuidle callers, use srcu since sched-rcu \ - * doesn't work from the idle path. \ - */ \ - if (rcuidle) { \ - __idx = srcu_read_lock_notrace(&tracepoint_srcu);\ - ct_irq_enter_irqson(); \ - } \ - \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ - \ - if (rcuidle) { \ - ct_irq_exit_irqson(); \ - srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\ - } \ - \ - preempt_enable_notrace(); \ - } while (0) - -#ifndef MODULE -#define __DECLARE_TRACE_RCU(name, proto, args, cond) \ - static inline void trace_##name##_rcuidle(proto) \ +#define DEFINE_RUST_DO_TRACE(name, proto, args) +#define __DEFINE_RUST_DO_TRACE(name, proto, args) \ + notrace void rust_do_trace_##name(proto) \ { \ - if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond), 1); \ + __rust_do_trace_##name(args); \ } -#else -#define __DECLARE_TRACE_RCU(name, proto, args, cond) -#endif /* * Make sure the alignment of the structure in the __tracepoints section will @@ -248,23 +232,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * site if it is not watching, as it will need to be active when the * tracepoint is enabled. */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ +#define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ extern int __traceiter_##name(data_proto); \ DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \ extern struct tracepoint __tracepoint_##name; \ - static inline void trace_##name(proto) \ - { \ - if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(name, \ - TP_ARGS(args), \ - TP_CONDITION(cond), 0); \ - if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - WARN_ONCE(!rcu_is_watching(), \ - "RCU not watching for tracepoint"); \ - } \ - } \ - __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ - PARAMS(cond)) \ + extern void rust_do_trace_##name(proto); \ static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ @@ -291,15 +263,61 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline bool \ trace_##name##_enabled(void) \ { \ - return static_key_false(&__tracepoint_##name.key); \ + return static_branch_unlikely(&__tracepoint_##name.key);\ + } + +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ + static inline void __rust_do_trace_##name(proto) \ + { \ + if (cond) { \ + guard(preempt_notrace)(); \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ + } \ + } \ + static inline void trace_##name(proto) \ + { \ + if (static_branch_unlikely(&__tracepoint_##name.key)) { \ + if (cond) { \ + guard(preempt_notrace)(); \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ + } \ + } \ + if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ + } \ + } + +#define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ + static inline void __rust_do_trace_##name(proto) \ + { \ + guard(rcu_tasks_trace)(); \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ + } \ + static inline void trace_##name(proto) \ + { \ + might_fault(); \ + if (static_branch_unlikely(&__tracepoint_##name.key)) { \ + guard(rcu_tasks_trace)(); \ + __DO_TRACE_CALL(name, TP_ARGS(args)); \ + } \ + if (IS_ENABLED(CONFIG_LOCKDEP)) { \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ + } \ } /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration * on the tracepoints. + * + * it_func[0] is never NULL because there is at least one element in the array + * when the array itself is non NULL. */ -#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ +#define __DEFINE_TRACE_EXT(_name, _ext, proto, args) \ static const char __tpstrtab_##_name[] \ __section("__tracepoints_strings") = #_name; \ extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ @@ -308,14 +326,14 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) struct tracepoint __tracepoint_##_name __used \ __section("__tracepoints") = { \ .name = __tpstrtab_##_name, \ - .key = STATIC_KEY_INIT_FALSE, \ + .key = STATIC_KEY_FALSE_INIT, \ .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ .iterator = &__traceiter_##_name, \ .probestub = &__probestub_##_name, \ - .regfunc = _reg, \ - .unregfunc = _unreg, \ - .funcs = NULL }; \ + .funcs = NULL, \ + .ext = _ext, \ + }; \ __TRACEPOINT_ENTRY(_name); \ int __traceiter_##_name(void *__data, proto) \ { \ @@ -336,10 +354,27 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) void __probestub_##_name(void *__data, proto) \ { \ } \ - DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); + DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); \ + DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args)) -#define DEFINE_TRACE(name, proto, args) \ - DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); +#define DEFINE_TRACE_FN(_name, _reg, _unreg, _proto, _args) \ + static struct tracepoint_ext __tracepoint_ext_##_name = { \ + .regfunc = _reg, \ + .unregfunc = _unreg, \ + .faultable = false, \ + }; \ + __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); + +#define DEFINE_TRACE_SYSCALL(_name, _reg, _unreg, _proto, _args) \ + static struct tracepoint_ext __tracepoint_ext_##_name = { \ + .regfunc = _reg, \ + .unregfunc = _unreg, \ + .faultable = true, \ + }; \ + __DEFINE_TRACE_EXT(_name, &__tracepoint_ext_##_name, PARAMS(_proto), PARAMS(_args)); + +#define DEFINE_TRACE(_name, _proto, _args) \ + __DEFINE_TRACE_EXT(_name, NULL, PARAMS(_proto), PARAMS(_args)); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ EXPORT_SYMBOL_GPL(__tracepoint_##name); \ @@ -352,11 +387,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #else /* !TRACEPOINTS_ENABLED */ -#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ +#define __DECLARE_TRACE_COMMON(name, proto, args, data_proto) \ static inline void trace_##name(proto) \ { } \ - static inline void trace_##name##_rcuidle(proto) \ - { } \ static inline int \ register_trace_##name(void (*probe)(data_proto), \ void *data) \ @@ -378,7 +411,14 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return false; \ } +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) + +#define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ + __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) + #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) +#define DEFINE_TRACE_SYSCALL(name, reg, unreg, proto, args) #define DEFINE_TRACE(name, proto, args) #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) @@ -439,6 +479,10 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) +#define DECLARE_TRACE_SYSCALL(name, proto, args) \ + __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ + PARAMS(void *__data, proto)) + #define TRACE_EVENT_FLAGS(event, flag) #define TRACE_EVENT_PERF_PERM(event, expr...) @@ -576,6 +620,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) struct, assign, print) \ DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) +#define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \ + print, reg, unreg) \ + DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FLAGS(event, flag) diff --git a/include/linux/types.h b/include/linux/types.h index 2bc8766ba20c..2d7b9ae8714c 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -115,8 +115,9 @@ typedef u64 u_int64_t; typedef s64 int64_t; #endif -/* this is a special 64bit data type that is 8-byte aligned */ +/* These are the special 64-bit data types that are 8-byte aligned */ #define aligned_u64 __aligned_u64 +#define aligned_s64 __aligned_s64 #define aligned_be64 __aligned_be64 #define aligned_le64 __aligned_le64 diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d8e4105a2f21..e9c702c1908d 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -33,6 +33,14 @@ }) #endif +#ifdef masked_user_access_begin + #define can_do_masked_user_access() 1 +#else + #define can_do_masked_user_access() 0 + #define masked_user_access_begin(src) NULL + #define mask_user_address(src) (src) +#endif + /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) * and get rid of their private instances of copy_{to,from}_user() and @@ -152,19 +160,27 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); - if (!should_fail_usercopy() && likely(access_ok(from, n))) { + if (should_fail_usercopy()) + goto fail; + if (can_do_masked_user_access()) + from = mask_user_address(from); + else { + if (!access_ok(from, n)) + goto fail; /* * Ensure that bad access_ok() speculation will not * lead to nasty side effects *after* the copy is * finished: */ barrier_nospec(); - instrument_copy_from_user_before(to, from, n); - res = raw_copy_from_user(to, from, n); - instrument_copy_from_user_after(to, from, n, res); } - if (unlikely(res)) - memset(to + (n - res), 0, res); + instrument_copy_from_user_before(to, from, n); + res = raw_copy_from_user(to, from, n); + instrument_copy_from_user_after(to, from, n, res); + if (likely(!res)) + return 0; +fail: + memset(to + (n - res), 0, res); return res; } extern __must_check unsigned long @@ -387,6 +403,103 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } +/** + * copy_struct_to_user: copy a struct to userspace + * @dst: Destination address, in userspace. This buffer must be @ksize + * bytes long. + * @usize: (Alleged) size of @dst struct. + * @src: Source address, in kernel space. + * @ksize: Size of @src struct. + * @ignored_trailing: Set to %true if there was a non-zero byte in @src that + * userspace cannot see because they are using an smaller struct. + * + * Copies a struct from kernel space to userspace, in a way that guarantees + * backwards-compatibility for struct syscall arguments (as long as future + * struct extensions are made such that all new fields are *appended* to the + * old struct, and zeroed-out new fields have the same meaning as the old + * struct). + * + * Some syscalls may wish to make sure that userspace knows about everything in + * the struct, and if there is a non-zero value that userspce doesn't know + * about, they want to return an error (such as -EMSGSIZE) or have some other + * fallback (such as adding a "you're missing some information" flag). If + * @ignored_trailing is non-%NULL, it will be set to %true if there was a + * non-zero byte that could not be copied to userspace (ie. was past @usize). + * + * While unconditionally returning an error in this case is the simplest + * solution, for maximum backward compatibility you should try to only return + * -EMSGSIZE if the user explicitly requested the data that couldn't be copied. + * Note that structure sizes can change due to header changes and simple + * recompilations without code changes(!), so if you care about + * @ignored_trailing you probably want to make sure that any new field data is + * associated with a flag. Otherwise you might assume that a program knows + * about data it does not. + * + * @ksize is just sizeof(*src), and @usize should've been passed by userspace. + * The recommended usage is something like the following: + * + * SYSCALL_DEFINE2(foobar, struct foo __user *, uarg, size_t, usize) + * { + * int err; + * bool ignored_trailing; + * struct foo karg = {}; + * + * if (usize > PAGE_SIZE) + * return -E2BIG; + * if (usize < FOO_SIZE_VER0) + * return -EINVAL; + * + * // ... modify karg somehow ... + * + * err = copy_struct_to_user(uarg, usize, &karg, sizeof(karg), + * &ignored_trailing); + * if (err) + * return err; + * if (ignored_trailing) + * return -EMSGSIZE: + * + * // ... + * } + * + * There are three cases to consider: + * * If @usize == @ksize, then it's copied verbatim. + * * If @usize < @ksize, then the kernel is trying to pass userspace a newer + * struct than it supports. Thus we only copy the interoperable portions + * (@usize) and ignore the rest (but @ignored_trailing is set to %true if + * any of the trailing (@ksize - @usize) bytes are non-zero). + * * If @usize > @ksize, then the kernel is trying to pass userspace an older + * struct than userspace supports. In order to make sure the + * unknown-to-the-kernel fields don't contain garbage values, we zero the + * trailing (@usize - @ksize) bytes. + * + * Returns (in all cases, some data may have been copied): + * * -EFAULT: access to userspace failed. + */ +static __always_inline __must_check int +copy_struct_to_user(void __user *dst, size_t usize, const void *src, + size_t ksize, bool *ignored_trailing) +{ + size_t size = min(ksize, usize); + size_t rest = max(ksize, usize) - size; + + /* Double check if ksize is larger than a known object size. */ + if (WARN_ON_ONCE(ksize > __builtin_object_size(src, 1))) + return -E2BIG; + + /* Deal with trailing bytes. */ + if (usize > ksize) { + if (clear_user(dst + size, rest)) + return -EFAULT; + } + if (ignored_trailing) + *ignored_trailing = ksize < usize && + memchr_inv(src + size, 0, rest) != NULL; + /* Copy the interoperable parts of the struct. */ + if (copy_to_user(dst, src, size)) + return -EFAULT; + return 0; +} + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h index bff7445498de..d8219cbe09ff 100644 --- a/include/linux/ubsan.h +++ b/include/linux/ubsan.h @@ -4,6 +4,11 @@ #ifdef CONFIG_UBSAN_TRAP const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type); +#else +static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) +{ + return NULL; +} #endif #endif diff --git a/include/linux/udp.h b/include/linux/udp.h index 3eb3f2b9a2a0..0807e21cfec9 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -56,6 +56,12 @@ struct udp_sock { int pending; /* Any pending frames ? */ __u8 encap_type; /* Is this an Encapsulation socket? */ +#if !IS_ENABLED(CONFIG_BASE_SMALL) + /* For UDP 4-tuple hash */ + __u16 udp_lrpa_hash; + struct hlist_nulls_node udp_lrpa_node; +#endif + /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -206,6 +212,11 @@ static inline void udp_allow_gso(struct sock *sk) #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) +#if !IS_ENABLED(CONFIG_BASE_SMALL) +#define udp_lrpa_for_each_entry_rcu(__up, node, list) \ + hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node) +#endif + #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) #endif /* _LINUX_UDP_H */ diff --git a/include/linux/uio.h b/include/linux/uio.h index 7020adedfa08..853f9de5aa05 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -11,6 +11,7 @@ #include <uapi/linux/uio.h> struct page; +struct folio_queue; typedef unsigned int __bitwise iov_iter_extraction_t; @@ -25,6 +26,7 @@ enum iter_type { ITER_IOVEC, ITER_BVEC, ITER_KVEC, + ITER_FOLIOQ, ITER_XARRAY, ITER_DISCARD, }; @@ -66,6 +68,7 @@ struct iov_iter { const struct iovec *__iov; const struct kvec *kvec; const struct bio_vec *bvec; + const struct folio_queue *folioq; struct xarray *xarray; void __user *ubuf; }; @@ -74,6 +77,7 @@ struct iov_iter { }; union { unsigned long nr_segs; + u8 folioq_slot; loff_t xarray_start; }; }; @@ -126,6 +130,11 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i) return iov_iter_type(i) == ITER_DISCARD; } +static inline bool iov_iter_is_folioq(const struct iov_iter *i) +{ + return iov_iter_type(i) == ITER_FOLIOQ; +} + static inline bool iov_iter_is_xarray(const struct iov_iter *i) { return iov_iter_type(i) == ITER_XARRAY; @@ -180,6 +189,12 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, return copy_page_to_iter(&folio->page, offset, bytes, i); } +static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i) +{ + return copy_page_from_iter(&folio->page, offset, bytes, i); +} + static inline size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, size_t bytes, struct iov_iter *i) { @@ -273,6 +288,9 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec, unsigned long nr_segs, size_t count); void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); +void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction, + const struct folio_queue *folioq, + unsigned int first_slot, unsigned int offset, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, diff --git a/include/linux/unaligned.h b/include/linux/unaligned.h new file mode 100644 index 000000000000..4a9651017e3c --- /dev/null +++ b/include/linux/unaligned.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_UNALIGNED_H +#define __LINUX_UNALIGNED_H + +/* + * This is the most generic implementation of unaligned accesses + * and should work almost anywhere. + */ +#include <linux/unaligned/packed_struct.h> +#include <asm/byteorder.h> +#include <vdso/unaligned.h> + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) +#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) + +static inline u16 get_unaligned_le16(const void *p) +{ + return le16_to_cpu(__get_unaligned_t(__le16, p)); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return le32_to_cpu(__get_unaligned_t(__le32, p)); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return le64_to_cpu(__get_unaligned_t(__le64, p)); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_t(__le16, cpu_to_le16(val), p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_t(__le32, cpu_to_le32(val), p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_t(__le64, cpu_to_le64(val), p); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return be16_to_cpu(__get_unaligned_t(__be16, p)); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return be32_to_cpu(__get_unaligned_t(__be32, p)); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return be64_to_cpu(__get_unaligned_t(__be64, p)); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_t(__be16, cpu_to_be16(val), p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_t(__be32, cpu_to_be32(val), p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_t(__be64, cpu_to_be64(val), p); +} + +static inline u32 __get_unaligned_be24(const u8 *p) +{ + return p[0] << 16 | p[1] << 8 | p[2]; +} + +static inline u32 get_unaligned_be24(const void *p) +{ + return __get_unaligned_be24(p); +} + +static inline u32 __get_unaligned_le24(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16; +} + +static inline u32 get_unaligned_le24(const void *p) +{ + return __get_unaligned_le24(p); +} + +static inline void __put_unaligned_be24(const u32 val, u8 *p) +{ + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; +} + +static inline void put_unaligned_be24(const u32 val, void *p) +{ + __put_unaligned_be24(val, p); +} + +static inline void __put_unaligned_le24(const u32 val, u8 *p) +{ + *p++ = val & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = (val >> 16) & 0xff; +} + +static inline void put_unaligned_le24(const u32 val, void *p) +{ + __put_unaligned_le24(val, p); +} + +static inline void __put_unaligned_be48(const u64 val, u8 *p) +{ + *p++ = (val >> 40) & 0xff; + *p++ = (val >> 32) & 0xff; + *p++ = (val >> 24) & 0xff; + *p++ = (val >> 16) & 0xff; + *p++ = (val >> 8) & 0xff; + *p++ = val & 0xff; +} + +static inline void put_unaligned_be48(const u64 val, void *p) +{ + __put_unaligned_be48(val, p); +} + +static inline u64 __get_unaligned_be48(const u8 *p) +{ + return (u64)p[0] << 40 | (u64)p[1] << 32 | (u64)p[2] << 24 | + p[3] << 16 | p[4] << 8 | p[5]; +} + +static inline u64 get_unaligned_be48(const void *p) +{ + return __get_unaligned_be48(p); +} + +#endif /* __LINUX_UNALIGNED_H */ diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 4d39e6e11a95..5e6b212a2aed 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -16,6 +16,8 @@ struct utf8data_table; ((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \ ((unsigned int)(REV))) +#define UTF8_LATEST UNICODE_AGE(12, 1, 0) + static inline u8 unicode_major(unsigned int age) { return (age >> UNICODE_MAJ_SHIFT) & 0xff; @@ -76,4 +78,6 @@ int utf8_casefold_hash(const struct unicode_map *um, const void *salt, struct unicode_map *utf8_load(unsigned int version); void utf8_unload(struct unicode_map *um); +int utf8_parse_version(char *version); + #endif /* _LINUX_UNICODE_H */ diff --git a/include/linux/union_find.h b/include/linux/union_find.h new file mode 100644 index 000000000000..cfd49263c138 --- /dev/null +++ b/include/linux/union_find.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_UNION_FIND_H +#define __LINUX_UNION_FIND_H +/** + * union_find.h - union-find data structure implementation + * + * This header provides functions and structures to implement the union-find + * data structure. The union-find data structure is used to manage disjoint + * sets and supports efficient union and find operations. + * + * See Documentation/core-api/union_find.rst for documentation and samples. + */ + +struct uf_node { + struct uf_node *parent; + unsigned int rank; +}; + +/* This macro is used for static initialization of a union-find node. */ +#define UF_INIT_NODE(node) {.parent = &node, .rank = 0} + +/** + * uf_node_init - Initialize a union-find node + * @node: pointer to the union-find node to be initialized + * + * This function sets the parent of the node to itself and + * initializes its rank to 0. + */ +static inline void uf_node_init(struct uf_node *node) +{ + node->parent = node; + node->rank = 0; +} + +/* find the root of a node */ +struct uf_node *uf_find(struct uf_node *node); + +/* Merge two intersecting nodes */ +void uf_union(struct uf_node *node1, struct uf_node *node2); + +#endif /* __LINUX_UNION_FIND_H */ diff --git a/include/linux/unroll.h b/include/linux/unroll.h new file mode 100644 index 000000000000..d42fd6366373 --- /dev/null +++ b/include/linux/unroll.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2023 Google LLC. + */ + +#ifndef __UNROLL_H +#define __UNROLL_H + +#include <linux/args.h> + +#define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) + +#define __UNROLL_0(MACRO, args...) +#define __UNROLL_1(MACRO, args...) __UNROLL_0(MACRO, args) MACRO(0, args) +#define __UNROLL_2(MACRO, args...) __UNROLL_1(MACRO, args) MACRO(1, args) +#define __UNROLL_3(MACRO, args...) __UNROLL_2(MACRO, args) MACRO(2, args) +#define __UNROLL_4(MACRO, args...) __UNROLL_3(MACRO, args) MACRO(3, args) +#define __UNROLL_5(MACRO, args...) __UNROLL_4(MACRO, args) MACRO(4, args) +#define __UNROLL_6(MACRO, args...) __UNROLL_5(MACRO, args) MACRO(5, args) +#define __UNROLL_7(MACRO, args...) __UNROLL_6(MACRO, args) MACRO(6, args) +#define __UNROLL_8(MACRO, args...) __UNROLL_7(MACRO, args) MACRO(7, args) +#define __UNROLL_9(MACRO, args...) __UNROLL_8(MACRO, args) MACRO(8, args) +#define __UNROLL_10(MACRO, args...) __UNROLL_9(MACRO, args) MACRO(9, args) +#define __UNROLL_11(MACRO, args...) __UNROLL_10(MACRO, args) MACRO(10, args) +#define __UNROLL_12(MACRO, args...) __UNROLL_11(MACRO, args) MACRO(11, args) +#define __UNROLL_13(MACRO, args...) __UNROLL_12(MACRO, args) MACRO(12, args) +#define __UNROLL_14(MACRO, args...) __UNROLL_13(MACRO, args) MACRO(13, args) +#define __UNROLL_15(MACRO, args...) __UNROLL_14(MACRO, args) MACRO(14, args) +#define __UNROLL_16(MACRO, args...) __UNROLL_15(MACRO, args) MACRO(15, args) +#define __UNROLL_17(MACRO, args...) __UNROLL_16(MACRO, args) MACRO(16, args) +#define __UNROLL_18(MACRO, args...) __UNROLL_17(MACRO, args) MACRO(17, args) +#define __UNROLL_19(MACRO, args...) __UNROLL_18(MACRO, args) MACRO(18, args) +#define __UNROLL_20(MACRO, args...) __UNROLL_19(MACRO, args) MACRO(19, args) + +#endif /* __UNROLL_H */ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index b503fafb7fb3..e0a4c2082245 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -15,34 +15,47 @@ #include <linux/rbtree.h> #include <linux/types.h> #include <linux/wait.h> +#include <linux/timer.h> +struct uprobe; struct vm_area_struct; struct mm_struct; struct inode; struct notifier_block; struct page; +/* + * Allowed return values from uprobe consumer's handler callback + * with following meaning: + * + * UPROBE_HANDLER_REMOVE + * - Remove the uprobe breakpoint from current->mm. + * UPROBE_HANDLER_IGNORE + * - Ignore ret_handler callback for this consumer. + */ #define UPROBE_HANDLER_REMOVE 1 -#define UPROBE_HANDLER_MASK 1 +#define UPROBE_HANDLER_IGNORE 2 #define MAX_URETPROBE_DEPTH 64 -enum uprobe_filter_ctx { - UPROBE_FILTER_REGISTER, - UPROBE_FILTER_UNREGISTER, - UPROBE_FILTER_MMAP, -}; - struct uprobe_consumer { - int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); + /* + * handler() can return UPROBE_HANDLER_REMOVE to signal the need to + * unregister uprobe for current process. If UPROBE_HANDLER_REMOVE is + * returned, filter() callback has to be implemented as well and it + * should return false to "confirm" the decision to uninstall uprobe + * for the current process. If filter() is omitted or returns true, + * UPROBE_HANDLER_REMOVE is effectively ignored. + */ + int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data); int (*ret_handler)(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs); - bool (*filter)(struct uprobe_consumer *self, - enum uprobe_filter_ctx ctx, - struct mm_struct *mm); + struct pt_regs *regs, __u64 *data); + bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); - struct uprobe_consumer *next; + struct list_head cons_node; + + __u64 id; /* set when uprobe_consumer is registered */ }; #ifdef CONFIG_UPROBES @@ -55,12 +68,62 @@ enum uprobe_task_state { UTASK_SSTEP_TRAPPED, }; +/* The state of hybrid-lifetime uprobe inside struct return_instance */ +enum hprobe_state { + HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */ + HPROBE_STABLE, /* refcounted uprobe */ + HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */ + HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */ +}; + +/* + * Hybrid lifetime uprobe. Represents a uprobe instance that could be either + * SRCU protected (with SRCU protection eventually potentially timing out), + * refcounted using uprobe->ref, or there could be no valid uprobe (NULL). + * + * hprobe's internal state is setup such that background timer thread can + * atomically "downgrade" temporarily RCU-protected uprobe into refcounted one + * (or no uprobe, if refcounting failed). + * + * *stable* pointer always point to the uprobe (or could be NULL if there is + * was no valid underlying uprobe to begin with). + * + * *leased* pointer is the key to achieving race-free atomic lifetime state + * transition and can have three possible states: + * - either the same non-NULL value as *stable*, in which case uprobe is + * SRCU-protected; + * - NULL, in which case uprobe (if there is any) is refcounted; + * - special __UPROBE_DEAD value, which represents an uprobe that was SRCU + * protected initially, but SRCU period timed out and we attempted to + * convert it to refcounted, but refcount_inc_not_zero() failed, because + * uprobe effectively went away (the last consumer unsubscribed). In this + * case it's important to know that *stable* pointer (which still has + * non-NULL uprobe pointer) shouldn't be used, because lifetime of + * underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an + * internal marker and is handled transparently by hprobe_fetch() helper. + * + * When uprobe is SRCU-protected, we also record srcu_idx value, necessary for + * SRCU unlocking. + * + * See hprobe_expire() and hprobe_fetch() for details of race-free uprobe + * state transitioning details. It all hinges on atomic xchg() over *leaded* + * pointer. *stable* pointer, once initially set, is not modified concurrently. + */ +struct hprobe { + enum hprobe_state state; + int srcu_idx; + struct uprobe *uprobe; +}; + /* * uprobe_task: Metadata of a task while it singlesteps. */ struct uprobe_task { enum uprobe_task_state state; + unsigned int depth; + struct return_instance *return_instances; + union { struct { struct arch_uprobe_task autask; @@ -74,21 +137,30 @@ struct uprobe_task { }; struct uprobe *active_uprobe; + struct timer_list ri_timer; unsigned long xol_vaddr; - struct return_instance *return_instances; - unsigned int depth; + struct arch_uprobe *auprobe; +}; + +struct return_consumer { + __u64 cookie; + __u64 id; }; struct return_instance { - struct uprobe *uprobe; + struct hprobe hprobe; unsigned long func; unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ + int consumers_cnt; struct return_instance *next; /* keep as stack */ -}; + struct rcu_head rcu; + + struct return_consumer consumers[] __counted_by(consumers_cnt); +} ____cacheline_aligned; enum rp_check { RP_CHECK_CALL, @@ -110,10 +182,10 @@ extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); -extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); -extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); -extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); -extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); +extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); +extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); +extern void uprobe_unregister_sync(void); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void uprobe_start_dup_mmap(void); @@ -151,22 +223,21 @@ static inline void uprobes_init(void) #define uprobe_get_trap_addr(regs) instruction_pointer(regs) -static inline int -uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) -{ - return -ENOSYS; -} -static inline int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) +static inline struct uprobe * +uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc) { - return -ENOSYS; + return ERR_PTR(-ENOSYS); } static inline int -uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) +uprobe_apply(struct uprobe* uprobe, struct uprobe_consumer *uc, bool add) { return -ENOSYS; } static inline void -uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) +uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc) +{ +} +static inline void uprobe_unregister_sync(void) { } static inline int uprobe_mmap(struct vm_area_struct *vma) diff --git a/include/linux/usb.h b/include/linux/usb.h index 832997a9da0a..cfa8005e24f9 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -495,6 +495,12 @@ struct usb_dev_state; struct usb_tt; +enum usb_link_tunnel_mode { + USB_LINK_UNKNOWN = 0, + USB_LINK_NATIVE, + USB_LINK_TUNNELED, +}; + enum usb_port_connect_type { USB_PORT_CONNECT_TYPE_UNKNOWN = 0, USB_PORT_CONNECT_TYPE_HOT_PLUG, @@ -605,6 +611,7 @@ struct usb3_lpm_parameters { * WUSB devices are not, until we authorize them from user space. * FIXME -- complete doc * @authenticated: Crypto authentication passed + * @tunnel_mode: Connection native or tunneled over USB4 * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -714,6 +721,7 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; + enum usb_link_tunnel_mode tunnel_mode; int slot_id; struct usb2_lpm_parameters l1_params; @@ -1121,8 +1129,8 @@ static inline int usb_make_path(struct usb_device *dev, char *buf, size_t size) /* ----------------------------------------------------------------------- */ /* Stuff for dynamic usb ids */ +extern struct mutex usb_dynids_lock; struct usb_dynids { - spinlock_t lock; struct list_head list; }; @@ -1235,7 +1243,7 @@ struct usb_driver { unsigned int disable_hub_initiated_lpm:1; unsigned int soft_unbind:1; }; -#define to_usb_driver(d) container_of(d, struct usb_driver, driver) +#define to_usb_driver(d) container_of_const(d, struct usb_driver, driver) /** * struct usb_device_driver - identifies USB device driver to usbcore @@ -1286,8 +1294,7 @@ struct usb_device_driver { unsigned int supports_autosuspend:1; unsigned int generic_subclass:1; }; -#define to_usb_device_driver(d) container_of(d, struct usb_device_driver, \ - driver) +#define to_usb_device_driver(d) container_of_const(d, struct usb_device_driver, driver) /** * struct usb_class_driver - identifies a USB driver that wants to use the USB major number diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index 5a7f96684ea2..ebdfef124b2b 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -65,6 +65,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_PHY_VBUS_CONTROL BIT(16) #define CI_HDRC_HAS_PORTSC_PEC_MISSED BIT(17) #define CI_HDRC_FORCE_VBUS_ACTIVE_ALWAYS BIT(18) +#define CI_HDRC_HAS_SHORT_PKT_LIMIT BIT(19) enum usb_dr_mode dr_mode; #define CI_HDRC_CONTROLLER_RESET_EVENT 0 #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index af3cd2aae4bc..6e38fb9d2117 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -256,7 +256,7 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f, struct usb_ep *_ep); int usb_func_wakeup(struct usb_function *func); -#define MAX_CONFIG_INTERFACES 16 /* arbitrary; max 255 */ +#define MAX_CONFIG_INTERFACES 32 /** * struct usb_configuration - represents one gadget configuration diff --git a/include/linux/usb/func_utils.h b/include/linux/usb/func_utils.h new file mode 100644 index 000000000000..c8795c965109 --- /dev/null +++ b/include/linux/usb/func_utils.h @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * func_utils.h + * + * Utility definitions for USB functions + * + * Copyright (c) 2013 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com> + */ + +#ifndef _FUNC_UTILS_H_ +#define _FUNC_UTILS_H_ + +#include <linux/usb/gadget.h> +#include <linux/overflow.h> + +/* Variable Length Array Macros **********************************************/ +#define vla_group(groupname) size_t groupname##__next = 0 +#define vla_group_size(groupname) groupname##__next + +#define vla_item(groupname, type, name, n) \ + size_t groupname##_##name##__offset = ({ \ + size_t offset = 0; \ + if (groupname##__next != SIZE_MAX) { \ + size_t align_mask = __alignof__(type) - 1; \ + size_t size = array_size(n, sizeof(type)); \ + offset = (groupname##__next + align_mask) & \ + ~align_mask; \ + if (check_add_overflow(offset, size, \ + &groupname##__next)) { \ + groupname##__next = SIZE_MAX; \ + offset = 0; \ + } \ + } \ + offset; \ + }) + +#define vla_item_with_sz(groupname, type, name, n) \ + size_t groupname##_##name##__sz = array_size(n, sizeof(type)); \ + size_t groupname##_##name##__offset = ({ \ + size_t offset = 0; \ + if (groupname##__next != SIZE_MAX) { \ + size_t align_mask = __alignof__(type) - 1; \ + offset = (groupname##__next + align_mask) & \ + ~align_mask; \ + if (check_add_overflow(offset, groupname##_##name##__sz,\ + &groupname##__next)) { \ + groupname##__next = SIZE_MAX; \ + offset = 0; \ + } \ + } \ + offset; \ + }) + +#define vla_ptr(ptr, groupname, name) \ + ((void *) ((char *)ptr + groupname##_##name##__offset)) + +struct usb_ep; +struct usb_request; + +/** + * alloc_ep_req - returns a usb_request allocated by the gadget driver and + * allocates the request's buffer. + * + * @ep: the endpoint to allocate a usb_request + * @len: usb_requests's buffer suggested size + * + * In case @ep direction is OUT, the @len will be aligned to ep's + * wMaxPacketSize. In order to avoid memory leaks or drops, *always* use + * usb_requests's length (req->length) to refer to the allocated buffer size. + * Requests allocated via alloc_ep_req() *must* be freed by free_ep_req(). + */ +struct usb_request *alloc_ep_req(struct usb_ep *ep, size_t len); + +/* Frees a usb_request previously allocated by alloc_ep_req() */ +static inline void free_ep_req(struct usb_ep *ep, struct usb_request *req) +{ + WARN_ON(req->buf == NULL); + kfree(req->buf); + req->buf = NULL; + usb_ep_free_request(ep, req); +} + +#endif /* _FUNC_UTILS_H_ */ diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h index d61aebd68128..6b5d6838f865 100644 --- a/include/linux/usb/gadget_configfs.h +++ b/include/linux/usb/gadget_configfs.h @@ -4,9 +4,6 @@ #include <linux/configfs.h> -int check_user_usb_string(const char *name, - struct usb_gadget_strings *stringtab_dev); - #define GS_STRINGS_W(__struct, __name) \ static ssize_t __struct##_##__name##_store(struct config_item *item, \ const char *page, size_t len) \ @@ -37,7 +34,7 @@ static struct configfs_item_operations struct_in##_langid_item_ops = { \ .release = struct_in##_attr_release, \ }; \ \ -static struct config_item_type struct_in##_langid_type = { \ +static const struct config_item_type struct_in##_langid_type = { \ .ct_item_ops = &struct_in##_langid_item_ops, \ .ct_attrs = struct_in##_langid_attrs, \ .ct_owner = THIS_MODULE, \ @@ -94,7 +91,7 @@ static struct configfs_group_operations struct_in##_strings_ops = { \ .drop_item = &struct_in##_strings_drop, \ }; \ \ -static struct config_item_type struct_in##_strings_type = { \ +static const struct config_item_type struct_in##_strings_type = { \ .ct_group_ops = &struct_in##_strings_ops, \ .ct_owner = THIS_MODULE, \ } diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 1a0a4dc87980..75b2b763f1ba 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -311,8 +311,11 @@ struct usb_serial_driver { #define to_usb_serial_driver(d) \ container_of(d, struct usb_serial_driver, driver) -int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[], - const char *name, const struct usb_device_id *id_table); +#define usb_serial_register_drivers(serial_drivers, name, id_table) \ + __usb_serial_register_drivers(serial_drivers, THIS_MODULE, name, id_table) +int __usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[], + struct module *owner, const char *name, + const struct usb_device_id *id_table); void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]); void usb_serial_port_softint(struct usb_serial_port *port); diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h index 2827ce72e502..8539956bc2be 100644 --- a/include/linux/usb/storage.h +++ b/include/linux/usb/storage.h @@ -53,7 +53,7 @@ struct bulk_cb_wrap { __le32 Signature; /* contains 'USBC' */ __u32 Tag; /* unique per command id */ __le32 DataTransferLength; /* size of data */ - __u8 Flags; /* direction in bit 0 */ + __u8 Flags; /* direction in bit 7 */ __u8 Lun; /* LUN normally 0 */ __u8 Length; /* length of the CDB */ __u8 CDB[16]; /* max command */ diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 0ab39b6ea205..f7f5cfbdef12 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -63,15 +63,12 @@ #define TCPC_ROLE_CTRL 0x1a #define TCPC_ROLE_CTRL_DRP BIT(6) -#define TCPC_ROLE_CTRL_RP_VAL_SHIFT 4 -#define TCPC_ROLE_CTRL_RP_VAL_MASK 0x3 +#define TCPC_ROLE_CTRL_RP_VAL GENMASK(5, 4) #define TCPC_ROLE_CTRL_RP_VAL_DEF 0x0 #define TCPC_ROLE_CTRL_RP_VAL_1_5 0x1 #define TCPC_ROLE_CTRL_RP_VAL_3_0 0x2 -#define TCPC_ROLE_CTRL_CC2_SHIFT 2 -#define TCPC_ROLE_CTRL_CC2_MASK 0x3 -#define TCPC_ROLE_CTRL_CC1_SHIFT 0 -#define TCPC_ROLE_CTRL_CC1_MASK 0x3 +#define TCPC_ROLE_CTRL_CC2 GENMASK(3, 2) +#define TCPC_ROLE_CTRL_CC1 GENMASK(1, 0) #define TCPC_ROLE_CTRL_CC_RA 0x0 #define TCPC_ROLE_CTRL_CC_RP 0x1 #define TCPC_ROLE_CTRL_CC_RD 0x2 @@ -92,11 +89,9 @@ #define TCPC_CC_STATUS_TERM BIT(4) #define TCPC_CC_STATUS_TERM_RP 0 #define TCPC_CC_STATUS_TERM_RD 1 +#define TCPC_CC_STATUS_CC2 GENMASK(3, 2) +#define TCPC_CC_STATUS_CC1 GENMASK(1, 0) #define TCPC_CC_STATE_SRC_OPEN 0 -#define TCPC_CC_STATUS_CC2_SHIFT 2 -#define TCPC_CC_STATUS_CC2_MASK 0x3 -#define TCPC_CC_STATUS_CC1_SHIFT 0 -#define TCPC_CC_STATUS_CC1_MASK 0x3 #define TCPC_POWER_STATUS 0x1e #define TCPC_POWER_STATUS_DBG_ACC_CON BIT(7) @@ -134,9 +129,8 @@ #define TCPC_MSG_HDR_INFO 0x2e #define TCPC_MSG_HDR_INFO_DATA_ROLE BIT(3) +#define TCPC_MSG_HDR_INFO_REV GENMASK(2, 1) #define TCPC_MSG_HDR_INFO_PWR_ROLE BIT(0) -#define TCPC_MSG_HDR_INFO_REV_SHIFT 1 -#define TCPC_MSG_HDR_INFO_REV_MASK 0x3 #define TCPC_RX_DETECT 0x2f #define TCPC_RX_DETECT_HARD_RESET BIT(5) @@ -154,10 +148,8 @@ #define TCPC_RX_DATA 0x34 /* through 0x4f */ #define TCPC_TRANSMIT 0x50 -#define TCPC_TRANSMIT_RETRY_SHIFT 4 -#define TCPC_TRANSMIT_RETRY_MASK 0x3 -#define TCPC_TRANSMIT_TYPE_SHIFT 0 -#define TCPC_TRANSMIT_TYPE_MASK 0x7 +#define TCPC_TRANSMIT_RETRY GENMASK(5, 4) +#define TCPC_TRANSMIT_TYPE GENMASK(2, 0) #define TCPC_TX_BYTE_CNT 0x51 #define TCPC_TX_HDR 0x52 @@ -178,8 +170,7 @@ #define tcpc_presenting_rd(reg, cc) \ (!(TCPC_ROLE_CTRL_DRP & (reg)) && \ - (((reg) & (TCPC_ROLE_CTRL_## cc ##_MASK << TCPC_ROLE_CTRL_## cc ##_SHIFT)) == \ - (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_## cc ##_SHIFT))) + FIELD_GET(TCPC_ROLE_CTRL_## cc, reg) == TCPC_ROLE_CTRL_CC_RD) struct tcpci; @@ -190,7 +181,7 @@ struct tcpci; * Optional; Callback to perform chip specific operations when FRS * is sourcing vbus. * @auto_discharge_disconnect: - * Optional; Enables TCPC to autonously discharge vbus on disconnect. + * Optional; Enables TCPC to autonomously discharge vbus on disconnect. * @vbus_vsafe0v: * optional; Set when TCPC can detect whether vbus is at VSAFE0V. * @set_partner_usb_comm_capable: @@ -256,7 +247,7 @@ static inline enum typec_cc_status tcpci_to_typec_cc(unsigned int cc, bool sink) if (sink) return TYPEC_CC_RP_3_0; fallthrough; - case 0x0: + case TCPC_CC_STATE_SRC_OPEN: default: return TYPEC_CC_OPEN; } diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index 549275f8ac1b..d616b8807000 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -87,6 +87,17 @@ enum typec_orientation { TYPEC_ORIENTATION_REVERSE, }; +enum usb_mode { + USB_MODE_NONE, + USB_MODE_USB2, + USB_MODE_USB3, + USB_MODE_USB4 +}; + +#define USB_CAPABILITY_USB2 BIT(0) +#define USB_CAPABILITY_USB3 BIT(1) +#define USB_CAPABILITY_USB4 BIT(2) + /* * struct enter_usb_data - Enter_USB Message details * @eudo: Enter_USB Data Object @@ -209,6 +220,7 @@ struct typec_cable_desc { * @accessory: Audio, Debug or none. * @identity: Discover Identity command data * @pd_revision: USB Power Delivery Specification Revision if supported + * @usb_capability: Supported USB Modes * @attach: Notification about attached USB device * @deattach: Notification about removed USB device * @@ -226,6 +238,7 @@ struct typec_partner_desc { enum typec_accessory accessory; struct usb_pd_identity *identity; u16 pd_revision; /* 0300H = "3.0" */ + u8 usb_capability; void (*attach)(struct typec_partner *partner, struct device *dev); void (*deattach)(struct typec_partner *partner, struct device *dev); @@ -240,6 +253,8 @@ struct typec_partner_desc { * @port_type_set: Set port type * @pd_get: Get available USB Power Delivery Capabilities. * @pd_set: Set USB Power Delivery Capabilities. + * @default_usb_mode_set: USB Mode to be used by default with Enter_USB Message + * @enter_usb_mode: Change the active USB Mode */ struct typec_operations { int (*try_role)(struct typec_port *port, int role); @@ -250,6 +265,8 @@ struct typec_operations { enum typec_port_type type); struct usb_power_delivery **(*pd_get)(struct typec_port *port); int (*pd_set)(struct typec_port *port, struct usb_power_delivery *pd); + int (*default_usb_mode_set)(struct typec_port *port, enum usb_mode mode); + int (*enter_usb_mode)(struct typec_port *port, enum usb_mode mode); }; enum usb_pd_svdm_ver { @@ -267,6 +284,7 @@ enum usb_pd_svdm_ver { * @svdm_version: USB PD Structured VDM version if supported * @prefer_role: Initial role preference (DRP ports). * @accessory: Supported Accessory Modes + * @usb_capability: Supported USB Modes * @fwnode: Optional fwnode of the port * @driver_data: Private pointer for driver specific info * @pd: Optional USB Power Delivery Support @@ -283,6 +301,7 @@ struct typec_capability { int prefer_role; enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; unsigned int orientation_aware:1; + u8 usb_capability; struct fwnode_handle *fwnode; void *driver_data; @@ -350,6 +369,9 @@ int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_ int typec_partner_set_usb_power_delivery(struct typec_partner *partner, struct usb_power_delivery *pd); +void typec_partner_set_usb_mode(struct typec_partner *partner, enum usb_mode usb_mode); +void typec_port_set_usb_mode(struct typec_port *port, enum usb_mode mode); + /** * struct typec_connector - Representation of Type-C port for external drivers * @attach: notification about device removal diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 9f08a584d707..0b9f1e598e3a 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -76,8 +76,23 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 +/* This one is special, as it indicates that the device is going away + * there are cyclic dependencies between tasklet, timer and bh + * that must be broken + */ +# define EVENT_UNPLUG 31 }; +static inline bool usbnet_going_away(struct usbnet *ubn) +{ + return test_bit(EVENT_UNPLUG, &ubn->flags); +} + +static inline void usbnet_mark_going_away(struct usbnet *ubn) +{ + set_bit(EVENT_UNPLUG, &ubn->flags); +} + static inline struct usb_driver *driver_of(struct usb_interface *intf) { return to_usb_driver(intf->dev.driver); diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 88d96095bcb1..bce95153e5a6 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -118,6 +118,9 @@ #define UVC_GUID_FORMAT_Y12I \ { 'Y', '1', '2', 'I', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y16I \ + { 'Y', '1', '6', 'I', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} #define UVC_GUID_FORMAT_Z16 \ { 'Z', '1', '6', ' ', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} @@ -140,6 +143,9 @@ #define UVC_GUID_FORMAT_D3DFMT_L8 \ {0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_D3DFMT_R5G6B5 \ + {0x7b, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \ + 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70} #define UVC_GUID_FORMAT_KSMEDIA_L8_IR \ {0x32, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6030a8235617..7183e5aca282 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -21,9 +21,11 @@ struct uid_gid_extent { }; struct uid_gid_map { /* 64 bytes -- 1 cache line */ - u32 nr_extents; union { - struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; + struct { + struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; + u32 nr_extents; + }; struct { struct uid_gid_extent *forward; struct uid_gid_extent *reverse; @@ -139,7 +141,8 @@ static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type ty long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v); -long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type); +long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, + bool override_rlimit); void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type); bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max); diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index a12bcf042551..cb40f1a1d081 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); extern void dup_userfaultfd_complete(struct list_head *); +void dup_userfaultfd_fail(struct list_head *); extern void mremap_userfaultfd_prep(struct vm_area_struct *, struct vm_userfaultfd_ctx *); @@ -267,6 +268,25 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm, extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); extern bool userfaultfd_wp_async(struct vm_area_struct *vma); +void userfaultfd_reset_ctx(struct vm_area_struct *vma); + +struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, + struct vm_area_struct *prev, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end); + +int userfaultfd_register_range(struct userfaultfd_ctx *ctx, + struct vm_area_struct *vma, + unsigned long vm_flags, + unsigned long start, unsigned long end, + bool wp_async); + +void userfaultfd_release_new(struct userfaultfd_ctx *ctx); + +void userfaultfd_release_all(struct mm_struct *mm, + struct userfaultfd_ctx *ctx); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -332,6 +352,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l) { } +static inline void dup_userfaultfd_fail(struct list_head *l) +{ +} + static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *ctx) { diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 6bb460c3e818..825487fb66fa 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -4,19 +4,6 @@ #include <linux/math.h> -#define __find_closest(x, a, as, op) \ -({ \ - typeof(as) __fc_i, __fc_as = (as) - 1; \ - typeof(x) __fc_x = (x); \ - typeof(*a) const *__fc_a = (a); \ - for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ - if (__fc_x op DIV_ROUND_CLOSEST(__fc_a[__fc_i] + \ - __fc_a[__fc_i + 1], 2)) \ - break; \ - } \ - (__fc_i); \ -}) - /** * find_closest - locate the closest element in a sorted array * @x: The reference value. @@ -25,8 +12,27 @@ * @as: Size of 'a'. * * Returns the index of the element closest to 'x'. + * Note: If using an array of negative numbers (or mixed positive numbers), + * then be sure that 'x' is of a signed-type to get good results. */ -#define find_closest(x, a, as) __find_closest(x, a, as, <=) +#define find_closest(x, a, as) \ +({ \ + typeof(as) __fc_i, __fc_as = (as) - 1; \ + long __fc_mid_x, __fc_x = (x); \ + long __fc_left, __fc_right; \ + typeof(*a) const *__fc_a = (a); \ + for (__fc_i = 0; __fc_i < __fc_as; __fc_i++) { \ + __fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i + 1]) / 2; \ + if (__fc_x <= __fc_mid_x) { \ + __fc_left = __fc_x - __fc_a[__fc_i]; \ + __fc_right = __fc_a[__fc_i + 1] - __fc_x; \ + if (__fc_right < __fc_left) \ + __fc_i++; \ + break; \ + } \ + } \ + (__fc_i); \ +}) /** * find_closest_descending - locate the closest element in a sorted array @@ -36,9 +42,27 @@ * @as: Size of 'a'. * * Similar to find_closest() but 'a' is expected to be sorted in descending - * order. + * order. The iteration is done in reverse order, so that the comparison + * of '__fc_right' & '__fc_left' also works for unsigned numbers. */ -#define find_closest_descending(x, a, as) __find_closest(x, a, as, >=) +#define find_closest_descending(x, a, as) \ +({ \ + typeof(as) __fc_i, __fc_as = (as) - 1; \ + long __fc_mid_x, __fc_x = (x); \ + long __fc_left, __fc_right; \ + typeof(*a) const *__fc_a = (a); \ + for (__fc_i = __fc_as; __fc_i >= 1; __fc_i--) { \ + __fc_mid_x = (__fc_a[__fc_i] + __fc_a[__fc_i - 1]) / 2; \ + if (__fc_x <= __fc_mid_x) { \ + __fc_left = __fc_x - __fc_a[__fc_i]; \ + __fc_right = __fc_a[__fc_i - 1] - __fc_x; \ + if (__fc_right < __fc_left) \ + __fc_i--; \ + break; \ + } \ + } \ + (__fc_i); \ +}) /** * is_insidevar - check if the @ptr points inside the @var memory range. diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 7977ca03ac7a..2e7a30fe6b92 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -582,11 +582,20 @@ void vdpa_set_status(struct vdpa_device *vdev, u8 status); * @dev: vdpa device to remove * Driver need to remove the specified device by calling * _vdpa_unregister_device(). + * @dev_set_attr: change a vdpa device's attr after it was create + * @mdev: parent device to use for device + * @dev: vdpa device structure + * @config:Attributes to be set for the device. + * The driver needs to check the mask of the structure and then set + * the related information to the vdpa device. The driver must return 0 + * if set successfully. */ struct vdpa_mgmtdev_ops { int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, const struct vdpa_dev_set_config *config); void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); + int (*dev_set_attr)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev, + const struct vdpa_dev_set_config *config); }; /** diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 4b16844c6bc2..dd88682e27e3 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -56,6 +56,17 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq, void *ctx, gfp_t gfp); +int virtqueue_add_inbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + void *ctx, + gfp_t gfp); + +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, + struct scatterlist *sg, unsigned int num, + void *data, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, @@ -82,8 +93,6 @@ bool virtqueue_enable_cb(struct virtqueue *vq); unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); -int virtqueue_set_dma_premapped(struct virtqueue *_vq); - bool virtqueue_poll(struct virtqueue *vq, unsigned); bool virtqueue_enable_cb_delayed(struct virtqueue *vq); @@ -100,9 +109,11 @@ dma_addr_t virtqueue_get_avail_addr(const struct virtqueue *vq); dma_addr_t virtqueue_get_used_addr(const struct virtqueue *vq); int virtqueue_resize(struct virtqueue *vq, u32 num, - void (*recycle)(struct virtqueue *vq, void *buf)); + void (*recycle)(struct virtqueue *vq, void *buf), + void (*recycle_done)(struct virtqueue *vq)); int virtqueue_reset(struct virtqueue *vq, - void (*recycle)(struct virtqueue *vq, void *buf)); + void (*recycle)(struct virtqueue *vq, void *buf), + void (*recycle_done)(struct virtqueue *vq)); struct virtio_admin_cmd { __le16 opcode; @@ -111,6 +122,7 @@ struct virtio_admin_cmd { struct scatterlist *data_sg; struct scatterlist *result_sg; struct completion completion; + u32 result_sg_size; int ret; }; @@ -118,7 +130,9 @@ struct virtio_admin_cmd { * struct virtio_device - representation of a device using virtio * @index: unique position on the virtio bus * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore) - * @config_enabled: configuration change reporting enabled + * @config_core_enabled: configuration change reporting enabled by core + * @config_driver_disabled: configuration change reporting disabled by + * a driver * @config_change_pending: configuration change reported while disabled * @config_lock: protects configuration change reporting * @vqs_list_lock: protects @vqs. @@ -135,7 +149,8 @@ struct virtio_admin_cmd { struct virtio_device { int index; bool failed; - bool config_enabled; + bool config_core_enabled; + bool config_driver_disabled; bool config_change_pending; spinlock_t config_lock; spinlock_t vqs_list_lock; @@ -166,6 +181,10 @@ void __virtqueue_break(struct virtqueue *_vq); void __virtqueue_unbreak(struct virtqueue *_vq); void virtio_config_changed(struct virtio_device *dev); + +void virtio_config_driver_disable(struct virtio_device *dev); +void virtio_config_driver_enable(struct virtio_device *dev); + #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 6c395a2600e8..02a9f4dc594d 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -103,8 +103,10 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; + if (skb_transport_offset(skb) < nh_min_len) + return -EINVAL; - nh_min_len = max_t(u32, nh_min_len, skb_transport_offset(skb)); + nh_min_len = skb_transport_offset(skb); p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; @@ -173,7 +175,8 @@ retry: break; case SKB_GSO_TCPV4: case SKB_GSO_TCPV6: - if (skb->csum_offset != offsetof(struct tcphdr, check)) + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb->csum_offset != offsetof(struct tcphdr, check)) return -EINVAL; break; } diff --git a/include/linux/virtio_pci_admin.h b/include/linux/virtio_pci_admin.h index f4a100a0fe2e..dffc92c17ad2 100644 --- a/include/linux/virtio_pci_admin.h +++ b/include/linux/virtio_pci_admin.h @@ -20,4 +20,15 @@ int virtio_pci_admin_legacy_io_notify_info(struct pci_dev *pdev, u64 *bar_offset); #endif +bool virtio_pci_admin_has_dev_parts(struct pci_dev *pdev); +int virtio_pci_admin_mode_set(struct pci_dev *pdev, u8 mode); +int virtio_pci_admin_obj_create(struct pci_dev *pdev, u16 obj_type, u8 operation_type, + u32 *obj_id); +int virtio_pci_admin_obj_destroy(struct pci_dev *pdev, u16 obj_type, u32 id); +int virtio_pci_admin_dev_parts_metadata_get(struct pci_dev *pdev, u16 obj_type, + u32 id, u8 metadata_type, u32 *out); +int virtio_pci_admin_dev_parts_get(struct pci_dev *pdev, u16 obj_type, u32 id, + u8 get_type, struct scatterlist *res_sg, u32 *res_size); +int virtio_pci_admin_dev_parts_set(struct pci_dev *pdev, struct scatterlist *data_sg); + #endif /* _LINUX_VIRTIO_PCI_ADMIN_H */ diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index c82089dee0c8..0387d64e2c66 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -133,6 +133,7 @@ struct virtio_vsock_sock { u32 tx_cnt; u32 peer_fwd_cnt; u32 peer_buf_alloc; + size_t bytes_unsent; /* Protected by rx_lock */ u32 fwd_cnt; @@ -193,6 +194,11 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk); +ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk); + +void virtio_transport_consume_skb_sent(struct sk_buff *skb, + bool consume); + int virtio_transport_do_socket_init(struct vsock_sock *vsk, struct vsock_sock *psk); int diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 747943bc8cc2..f70d0958095c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -50,6 +50,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_ANON, PGSTEAL_FILE, #ifdef CONFIG_NUMA + PGSCAN_ZONE_RECLAIM_SUCCESS, PGSCAN_ZONE_RECLAIM_FAILED, #endif PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL, @@ -104,6 +105,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, THP_DEFERRED_SPLIT_PAGE, + THP_UNDERUSED_SPLIT_PAGE, THP_SPLIT_PMD, THP_SCAN_EXCEED_NONE_PTE, THP_SCAN_EXCEED_SWAP_PTE, @@ -132,6 +134,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, + SWPIN_ZERO, + SWPOUT_ZERO, #ifdef CONFIG_KSM KSM_SWPIN_COPY, #endif @@ -154,6 +158,30 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, VMA_LOCK_RETRY, VMA_LOCK_MISS, #endif +#ifdef CONFIG_DEBUG_STACK_USAGE + KSTACK_1K, +#if THREAD_SIZE > 1024 + KSTACK_2K, +#endif +#if THREAD_SIZE > 2048 + KSTACK_4K, +#endif +#if THREAD_SIZE > 4096 + KSTACK_8K, +#endif +#if THREAD_SIZE > 8192 + KSTACK_16K, +#endif +#if THREAD_SIZE > 16384 + KSTACK_32K, +#endif +#if THREAD_SIZE > 32768 + KSTACK_64K, +#endif +#if THREAD_SIZE > 65536 + KSTACK_REST, +#endif +#endif /* CONFIG_DEBUG_STACK_USAGE */ NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index e4a631ec430b..31e9ffd936e3 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -134,12 +134,6 @@ extern void vm_unmap_ram(const void *mem, unsigned int count); extern void *vm_map_ram(struct page **pages, unsigned int count, int node); extern void vm_unmap_aliases(void); -#ifdef CONFIG_MMU -extern unsigned long vmalloc_nr_pages(void); -#else -static inline unsigned long vmalloc_nr_pages(void) { return 0; } -#endif - extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); #define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) @@ -189,6 +183,10 @@ extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1 extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) +void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags) + __realloc_size(2); +#define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) + extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); @@ -204,6 +202,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); +int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, + struct page **pages, unsigned int page_shift); + /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() @@ -262,12 +263,29 @@ static inline bool is_vm_area_hugepages(const void *addr) #endif } +/* for /proc/kcore */ +long vread_iter(struct iov_iter *iter, const char *addr, size_t count); + +/* + * Internals. Don't use.. + */ +__init void vm_area_add_early(struct vm_struct *vm); +__init void vm_area_register_early(struct vm_struct *vm, size_t align); + +int register_vmap_purge_notifier(struct notifier_block *nb); +int unregister_vmap_purge_notifier(struct notifier_block *nb); + #ifdef CONFIG_MMU +#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) + +unsigned long vmalloc_nr_pages(void); + int vm_area_map_pages(struct vm_struct *area, unsigned long start, unsigned long end, struct page **pages); void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); + static inline void set_vm_flush_reset_perms(void *addr) { struct vm_struct *vm = find_vm_area(addr); @@ -275,24 +293,14 @@ static inline void set_vm_flush_reset_perms(void *addr) if (vm) vm->flags |= VM_FLUSH_RESET_PERMS; } +#else /* !CONFIG_MMU */ +#define VMALLOC_TOTAL 0UL -#else -static inline void set_vm_flush_reset_perms(void *addr) -{ -} -#endif - -/* for /proc/kcore */ -extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count); - -/* - * Internals. Don't use.. - */ -extern __init void vm_area_add_early(struct vm_struct *vm); -extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +static inline unsigned long vmalloc_nr_pages(void) { return 0; } +static inline void set_vm_flush_reset_perms(void *addr) {} +#endif /* CONFIG_MMU */ -#ifdef CONFIG_SMP -# ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && defined(CONFIG_SMP) struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align); @@ -307,22 +315,9 @@ pcpu_get_vm_areas(const unsigned long *offsets, return NULL; } -static inline void -pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) -{ -} -# endif +static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) {} #endif -#ifdef CONFIG_MMU -#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) -#else -#define VMALLOC_TOTAL 0UL -#endif - -int register_vmap_purge_notifier(struct notifier_block *nb); -int unregister_vmap_purge_notifier(struct notifier_block *nb); - #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9eb77c9007e6..d2761bf8ff32 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -32,6 +32,7 @@ struct reclaim_stat { unsigned nr_ref_keep; unsigned nr_unmap_fail; unsigned nr_lazyfree_fail; + unsigned nr_demoted; }; /* Stat data for system wide items */ diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h index 919d999a8c1d..b6eeb8cb6070 100644 --- a/include/linux/vt_buffer.h +++ b/include/linux/vt_buffer.h @@ -28,45 +28,21 @@ #ifndef VT_BUF_HAVE_MEMSETW static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - count /= 2; - while (count--) - scr_writew(c, s++); -#else memset16(s, c, count / 2); -#endif } #endif #ifndef VT_BUF_HAVE_MEMCPYW static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - count /= 2; - while (count--) - scr_writew(scr_readw(s++), d++); -#else memcpy(d, s, count); -#endif } #endif #ifndef VT_BUF_HAVE_MEMMOVEW static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) { -#ifdef VT_BUF_HAVE_RW - if (d < s) - scr_memcpyw(d, s, count); - else { - count /= 2; - d += count; - s += count; - while (count--) - scr_writew(scr_readw(--s), --d); - } -#else memmove(d, s, count); -#endif } #endif diff --git a/include/linux/wait.h b/include/linux/wait.h index 8aa3372f21a0..6d90ad974408 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) #define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1) #define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0) +#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL) #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) @@ -541,8 +542,8 @@ do { \ int __ret = 0; \ struct hrtimer_sleeper __t; \ \ - hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ - HRTIMER_MODE_REL); \ + hrtimer_setup_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ + HRTIMER_MODE_REL); \ if ((timeout) != KTIME_MAX) { \ hrtimer_set_expires_range_ns(&__t.timer, timeout, \ current->timer_slack_ns); \ diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7725b7579b78..9e29d79fc790 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -8,7 +8,7 @@ #include <linux/wait.h> struct wait_bit_key { - void *flags; + unsigned long *flags; int bit_nr; unsigned long timeout; }; @@ -23,14 +23,14 @@ struct wait_bit_queue_entry { typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); -void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); +void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); -void wake_up_bit(void *word, int bit); -int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); -int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -struct wait_queue_head *bit_waitqueue(void *word, int bit); +void wake_up_bit(unsigned long *word, int bit); +int out_of_line_wait_on_bit(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode); +int out_of_line_wait_on_bit_timeout(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); +int out_of_line_wait_on_bit_lock(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode); +struct wait_queue_head *bit_waitqueue(unsigned long *word, int bit); extern void __init wait_bit_init(void); int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key); @@ -49,23 +49,24 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync extern int bit_wait(struct wait_bit_key *key, int mode); extern int bit_wait_io(struct wait_bit_key *key, int mode); extern int bit_wait_timeout(struct wait_bit_key *key, int mode); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); /** * wait_on_bit - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit. - * For instance, if one were to have waiters on a bitflag, one would - * call wait_on_bit() in threads waiting for the bit to clear. - * One uses wait_on_bit() where one is waiting for the bit to clear, - * but has no intention of setting it. - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the target bit, even + * if other processes on the same queue are waiting for other bits. + * + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal. */ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) @@ -80,17 +81,20 @@ wait_on_bit(unsigned long *word, int bit, unsigned mode) /** * wait_on_bit_io - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), but calls - * io_schedule() instead of schedule() for the actual waiting. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * This is similar to wait_on_bit(), but calls io_schedule() instead of + * schedule() for the actual waiting. + * + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal. */ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) @@ -104,19 +108,24 @@ wait_on_bit_io(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout to elapse + * @word: the address containing the bit being waited on + * @bit: the bit at that address being waited on * @mode: the task state to sleep in * @timeout: timeout, in jiffies * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared. This is similar to wait_on_bit(), except also takes a - * timeout parameter. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared, or for a timeout to expire. The + * clearing of the bit must be signalled with wake_up_bit(), often as + * clear_and_wake_up_bit(). + * + * This is similar to wait_on_bit(), except it also takes a timeout + * parameter. * - * Returned value will be zero if the bit was cleared before the - * @timeout elapsed, or non-zero if the @timeout elapsed or process - * received a signal and the mode permitted wakeup on that signal. + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or %-EINTR if the process received a + * signal and the mode permitted wake up on that signal, or %-EAGAIN if the + * timeout elapsed. */ static inline int wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, @@ -132,19 +141,21 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, /** * wait_on_bit_action - wait for a bit to be cleared - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * @word: the address containing the bit waited on + * @bit: the bit at that address being waited on * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared, and allow the waiting action to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. + * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP()) + * to be cleared. The clearing of the bit must be signalled with + * wake_up_bit(), often as clear_and_wake_up_bit(). + * + * This is similar to wait_on_bit(), but calls @action() instead of + * schedule() for the actual waiting. * - * Returned value will be zero if the bit was cleared, or non-zero - * if the process received a signal and the mode permitted wakeup - * on that signal. + * Returned value will be zero if the bit was cleared in which case the + * call has ACQUIRE semantics, or the error code returned by @action if + * that call returned non-zero. */ static inline int wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, @@ -157,23 +168,22 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, } /** - * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @mode: the task state to sleep in * - * There is a standard hashed waitqueue table for generic use. This - * is the part of the hashtable's accessor API that waits on a bit - * when one intends to set it, for instance, trying to lock bitflags. - * For instance, if one were to have waiters trying to set bitflag - * and waiting for it to clear before setting it, one would call - * wait_on_bit() in threads waiting to be able to set the bit. - * One uses wait_on_bit_lock() where one is waiting for the bit to - * clear with the intention of setting it, and when done, clearing it. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be + * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As + * soon as it is clear, atomically set it and return. * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. + * This is similar to wait_on_bit(), but sets the bit before returning. + * + * Returned value will be zero if the bit was successfully set in which + * case the call has the same memory sequencing semantics as + * test_and_clear_bit(), or %-EINTR if the process received a signal and + * the mode permitted wake up on that signal. */ static inline int wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) @@ -185,15 +195,18 @@ wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock_io - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to atomically set it. This is similar - * to wait_on_bit(), but calls io_schedule() instead of schedule() - * for the actual waiting. + * Wait for the given bit in an unsigned long or bitmap (see + * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be + * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As + * soon as it is clear, atomically set it and return. + * + * This is similar to wait_on_bit_lock(), but calls io_schedule() instead + * of schedule(). * * Returns zero if the bit was (eventually) found to be clear and was * set. Returns non-zero if a signal was delivered to the process and @@ -209,21 +222,19 @@ wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) } /** - * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it - * @word: the word being waited on, a kernel virtual address - * @bit: the bit of the word being waited on + * wait_on_bit_lock_action - wait for a bit to be cleared, then set it + * @word: the address containing the bit being waited on + * @bit: the bit of the word being waited on and set * @action: the function used to sleep, which may take special actions * @mode: the task state to sleep in * - * Use the standard hashed waitqueue table to wait for a bit - * to be cleared and then to set it, and allow the waiting action - * to be specified. - * This is like wait_on_bit() but allows fine control of how the waiting - * is done. + * This is similar to wait_on_bit_lock(), but calls @action() instead of + * schedule() for the actual waiting. * - * Returns zero if the bit was (eventually) found to be clear and was - * set. Returns non-zero if a signal was delivered to the process and - * the @mode allows that signal to wake the process. + * Returned value will be zero if the bit was successfully set in which + * case the call has the same memory sequencing semantics as + * test_and_clear_bit(), or the error code returned by @action if that + * call returned non-zero. */ static inline int wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, @@ -269,7 +280,26 @@ __out: __ret; \ #define __wait_var_event(var, condition) \ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ schedule()) +#define __wait_var_event_io(var, condition) \ + ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + io_schedule()) +/** + * wait_var_event - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true, only re-checking when a wake up is + * received for the given @var (an arbitrary kernel address which need + * not be directly related to the given condition, but usually is). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the given address. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event(var, condition) \ do { \ might_sleep(); \ @@ -278,10 +308,56 @@ do { \ __wait_var_event(var, condition); \ } while (0) +/** + * wait_var_event_io - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for an IO related @condition to be true, only re-checking when a + * wake up is received for the given @var (an arbitrary kernel address + * which need not be directly related to the given condition, but + * usually is). + * + * The process will wait on a waitqueue selected by hash from a shared + * pool. It will only be woken on a wake_up for the given address. + * + * This is similar to wait_var_event(), but calls io_schedule() instead + * of schedule(). + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ +#define wait_var_event_io(var, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __wait_var_event_io(var, condition); \ +} while (0) + #define __wait_var_event_killable(var, condition) \ ___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \ schedule()) +/** + * wait_var_event_killable - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true or a fatal signal to be received, + * only re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is + * 0 if the condition became true, or %-ERESTARTSYS if a fatal signal + * was received. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_killable(var, condition) \ ({ \ int __ret = 0; \ @@ -296,6 +372,26 @@ do { \ TASK_UNINTERRUPTIBLE, 0, timeout, \ __ret = schedule_timeout(__ret)) +/** + * wait_var_event_timeout - wait for a variable to be updated or a timeout to expire + * @var: the address of variable being waited on + * @condition: the condition to wait for + * @timeout: maximum time to wait in jiffies + * + * Wait for a @condition to be true or a timeout to expire, only + * re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is 0 if + * the timeout expired and the condition was still false, or the + * remaining time left in the timeout (but at least 1) if the condition + * was found to be true. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_timeout(var, condition, timeout) \ ({ \ long __ret = timeout; \ @@ -309,6 +405,23 @@ do { \ ___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \ schedule()) +/** + * wait_var_event_killable - wait for a variable to be updated and notified + * @var: the address of variable being waited on + * @condition: the condition to wait for + * + * Wait for a @condition to be true or a signal to be received, only + * re-checking the condition when a wake up is received for the given + * @var (an arbitrary kernel address which need not be directly related + * to the given condition, but usually is). + * + * This is similar to wait_var_event() but returns a value which is 0 if + * the condition became true, or %-ERESTARTSYS if a signal was received. + * + * The condition should normally use smp_load_acquire() or a similarly + * ordered access to ensure that any changes to memory made before the + * condition became true will be visible after the wait completes. + */ #define wait_var_event_interruptible(var, condition) \ ({ \ int __ret = 0; \ @@ -319,15 +432,122 @@ do { \ }) /** - * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit + * wait_var_event_any_lock - wait for a variable to be updated under a lock + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @lock: the object that is locked to protect updates to the variable + * @type: prefix on lock and unlock operations + * @state: waiting state, %TASK_UNINTERRUPTIBLE etc. + * + * Wait for a condition which can only be reliably tested while holding + * a lock. The variables assessed in the condition will normal be updated + * under the same lock, and the wake up should be signalled with + * wake_up_var_locked() under the same lock. + * + * This is similar to wait_var_event(), but assumes a lock is held + * while calling this function and while updating the variable. * + * This must be called while the given lock is held and the lock will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. The functions used to + * unlock and lock the object are constructed by appending _unlock and _lock + * to @type. + * + * Return %-ERESTARTSYS if a signal arrives which is allowed to interrupt + * the wait according to @state. + */ +#define wait_var_event_any_lock(var, condition, lock, type, state) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = ___wait_var_event(var, condition, state, 0, 0, \ + type ## _unlock(lock); \ + schedule(); \ + type ## _lock(lock)); \ + __ret; \ +}) + +/** + * wait_var_event_spinlock - wait for a variable to be updated under a spinlock + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @lock: the spinlock which protects updates to the variable + * + * Wait for a condition which can only be reliably tested while holding + * a spinlock. The variables assessed in the condition will normal be updated + * under the same spinlock, and the wake up should be signalled with + * wake_up_var_locked() under the same spinlock. + * + * This is similar to wait_var_event(), but assumes a spinlock is held + * while calling this function and while updating the variable. + * + * This must be called while the given lock is held and the lock will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. + */ +#define wait_var_event_spinlock(var, condition, lock) \ + wait_var_event_any_lock(var, condition, lock, spin, TASK_UNINTERRUPTIBLE) + +/** + * wait_var_event_mutex - wait for a variable to be updated under a mutex + * @var: the address of the variable being waited on + * @condition: condition to wait for + * @mutex: the mutex which protects updates to the variable + * + * Wait for a condition which can only be reliably tested while holding + * a mutex. The variables assessed in the condition will normal be + * updated under the same mutex, and the wake up should be signalled + * with wake_up_var_locked() under the same mutex. + * + * This is similar to wait_var_event(), but assumes a mutex is held + * while calling this function and while updating the variable. + * + * This must be called while the given mutex is held and the mutex will be + * dropped when schedule() is called to wait for a wake up, and will be + * reclaimed before testing the condition again. + */ +#define wait_var_event_mutex(var, condition, lock) \ + wait_var_event_any_lock(var, condition, lock, mutex, TASK_UNINTERRUPTIBLE) + +/** + * wake_up_var_protected - wake up waiters for a variable asserting that it is safe + * @var: the address of the variable being waited on + * @cond: the condition which afirms this is safe + * + * When waking waiters which use wait_var_event_any_lock() the waker must be + * holding the reelvant lock to avoid races. This version of wake_up_var() + * asserts that the relevant lock is held and so no barrier is needed. + * The @cond is only tested when CONFIG_LOCKDEP is enabled. + */ +#define wake_up_var_protected(var, cond) \ +do { \ + lockdep_assert(cond); \ + wake_up_var(var); \ +} while (0) + +/** + * wake_up_var_locked - wake up waiters for a variable while holding a spinlock or mutex + * @var: the address of the variable being waited on + * @lock: The spinlock or mutex what protects the variable + * + * Send a wake up for the given variable which should be waited for with + * wait_var_event_spinlock() or wait_var_event_mutex(). Unlike wake_up_var(), + * no extra barriers are needed as the locking provides sufficient sequencing. + */ +#define wake_up_var_locked(var, lock) \ + wake_up_var_protected(var, lockdep_is_held(lock)) + +/** + * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit * @bit: the bit of the word being waited on - * @word: the word being waited on, a kernel virtual address + * @word: the address containing the bit being waited on * - * You can use this helper if bitflags are manipulated atomically rather than - * non-atomically under a lock. + * The designated bit is cleared and any tasks waiting in wait_on_bit() + * or similar will be woken. This call has RELEASE semantics so that + * any changes to memory made before this call are guaranteed to be visible + * after the corresponding wait_on_bit() completes. */ -static inline void clear_and_wake_up_bit(int bit, void *word) +static inline void clear_and_wake_up_bit(int bit, unsigned long *word) { clear_bit_unlock(bit, word); /* See wake_up_bit() for which memory barrier you need to use. */ @@ -335,4 +555,64 @@ static inline void clear_and_wake_up_bit(int bit, void *word) wake_up_bit(word, bit); } +/** + * test_and_clear_wake_up_bit - clear a bit if it was set: wake up anyone waiting on that bit + * @bit: the bit of the word being waited on + * @word: the address of memory containing that bit + * + * If the bit is set and can be atomically cleared, any tasks waiting in + * wait_on_bit() or similar will be woken. This call has the same + * complete ordering semantics as test_and_clear_bit(). Any changes to + * memory made before this call are guaranteed to be visible after the + * corresponding wait_on_bit() completes. + * + * Returns %true if the bit was successfully set and the wake up was sent. + */ +static inline bool test_and_clear_wake_up_bit(int bit, unsigned long *word) +{ + if (!test_and_clear_bit(bit, word)) + return false; + /* no extra barrier required */ + wake_up_bit(word, bit); + return true; +} + +/** + * atomic_dec_and_wake_up - decrement an atomic_t and if zero, wake up waiters + * @var: the variable to dec and test + * + * Decrements the atomic variable and if it reaches zero, send a wake_up to any + * processes waiting on the variable. + * + * This function has the same complete ordering semantics as atomic_dec_and_test. + * + * Returns %true is the variable reaches zero and the wake up was sent. + */ + +static inline bool atomic_dec_and_wake_up(atomic_t *var) +{ + if (!atomic_dec_and_test(var)) + return false; + /* No extra barrier required */ + wake_up_var(var); + return true; +} + +/** + * store_release_wake_up - update a variable and send a wake_up + * @var: the address of the variable to be updated and woken + * @val: the value to store in the variable. + * + * Store the given value in the variable send a wake up to any tasks + * waiting on the variable. All necessary barriers are included to ensure + * the task calling wait_var_event() sees the new value and all values + * written to memory before this call. + */ +#define store_release_wake_up(var, val) \ +do { \ + smp_store_release(var, val); \ + smp_mb(); \ + wake_up_var(var); \ +} while (0) + #endif /* _LINUX_WAIT_BIT_H */ diff --git a/include/linux/wireless.h b/include/linux/wireless.h index e6e34d74dda0..03e5d3fe226d 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -21,8 +21,7 @@ struct compat_iw_point { __u16 length; __u16 flags; }; -#endif -#ifdef CONFIG_COMPAT + struct __compat_iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ @@ -49,5 +48,5 @@ struct __compat_iw_event { #define IW_EV_COMPAT_POINT_LEN \ (IW_EV_COMPAT_LCP_LEN + sizeof(struct compat_iw_point) - \ IW_EV_COMPAT_POINT_OFF) -#endif +#endif /* CONFIG_COMPAT */ #endif /* _LINUX_WIRELESS_H */ diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 3275470b5531..10751c8e5e6a 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -34,7 +34,7 @@ struct wmi_device { * * Cast a struct device to a struct wmi_device. */ -#define to_wmi_device(device) container_of(device, struct wmi_device, dev) +#define to_wmi_device(device) container_of_const(device, struct wmi_device, dev) extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, u8 instance, u32 method_id, @@ -56,6 +56,7 @@ u8 wmidev_instance_count(struct wmi_device *wdev); * @no_singleton: Driver can be instantiated multiple times * @probe: Callback for device binding * @remove: Callback for device unbinding + * @shutdown: Callback for device shutdown * @notify: Callback for receiving WMI events * * This represents WMI drivers which handle WMI devices. @@ -68,9 +69,18 @@ struct wmi_driver { int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); + void (*shutdown)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); }; +/** + * to_wmi_driver() - Helper macro to cast a driver to a wmi_driver + * @drv: driver struct + * + * Cast a struct device_driver to a struct wmi_driver. + */ +#define to_wmi_driver(drv) container_of_const(drv, struct wmi_driver, driver) + extern int __must_check __wmi_driver_register(struct wmi_driver *driver, struct module *owner); extern void wmi_driver_unregister(struct wmi_driver *driver); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4eb8f9563136..b0dc957c3e56 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -412,7 +412,7 @@ enum wq_flags { }; enum wq_consts { - WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ + WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, @@ -507,6 +507,47 @@ void workqueue_softirq_dead(unsigned int cpu); __printf(1, 4) struct workqueue_struct * alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); +#ifdef CONFIG_LOCKDEP +/** + * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags + * @max_active: max in-flight work items, 0 for default + * @lockdep_map: user-defined lockdep_map + * @...: args for @fmt + * + * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for + * workqueues created with the same purpose and to avoid leaking a lockdep_map + * on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +__printf(1, 5) struct workqueue_struct * +alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, + struct lockdep_map *lockdep_map, ...); + +/** + * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with + * user-defined lockdep_map + * + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) + * @lockdep_map: user-defined lockdep_map + * @args: args for @fmt + * + * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. + * Useful for workqueues created with the same purpose and to avoid leaking a + * lockdep_map on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +#define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ + alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), \ + 1, lockdep_map, ##args) +#endif + /** * alloc_ordered_workqueue - allocate an ordered workqueue * @fmt: printf format for the name of the workqueue diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 1a54676d843a..d11b903c2edb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,6 +79,9 @@ struct writeback_control { */ struct swap_iocb **swap_plug; + /* Target list for splitting a large folio */ + struct list_head *list; + /* internal fields used by the ->writepages implementation: */ struct folio_batch fbatch; pgoff_t index; @@ -200,7 +203,8 @@ void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) { - wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); + wait_var_event(inode_state_wait_address(inode, __I_NEW), + !(READ_ONCE(inode->i_state) & I_NEW)); } #ifdef CONFIG_CGROUP_WRITEBACK @@ -209,15 +213,12 @@ static inline void wait_on_inode(struct inode *inode) #include <linux/bio.h> void __inode_attach_wb(struct inode *inode, struct folio *folio); -void wbc_attach_and_unlock_inode(struct writeback_control *wbc, - struct inode *inode) - __releases(&inode->i_lock); void wbc_detach_inode(struct writeback_control *wbc); -void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page, +void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio, size_t bytes); int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done); -void cgroup_writeback_umount(void); +void cgroup_writeback_umount(struct super_block *sb); bool cleanup_offline_cgwb(struct bdi_writeback *wb); /** @@ -250,22 +251,8 @@ static inline void inode_detach_wb(struct inode *inode) } } -/** - * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite - * @wbc: writeback_control of interest - * @inode: target inode - * - * This function is to be used by __filemap_fdatawrite_range(), which is an - * alternative entry point into writeback code, and first ensures @inode is - * associated with a bdi_writeback and attaches it to @wbc. - */ -static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, - struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode_attach_wb(inode, NULL); - wbc_attach_and_unlock_inode(wbc, inode); -} +void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, + struct inode *inode); /** * wbc_init_bio - writeback specific initializtion of bio @@ -299,13 +286,6 @@ static inline void inode_detach_wb(struct inode *inode) { } -static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, - struct inode *inode) - __releases(&inode->i_lock) -{ - spin_unlock(&inode->i_lock); -} - static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode) { @@ -320,11 +300,11 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) } static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, - struct page *page, size_t bytes) + struct folio *folio, size_t bytes) { } -static inline void cgroup_writeback_umount(void) +static inline void cgroup_writeback_umount(struct super_block *sb) { } diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index bb763085479a..45ff6f7a872b 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -65,6 +65,16 @@ struct ww_acquire_ctx { #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; + /** + * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex. + * + * lockdep requires the lockdep_map for the first locked ww_mutex + * in a ww transaction to remain in memory until all ww_mutexes of + * the transaction have been unlocked. Ensure this by keeping a + * fake locked ww_mutex lockdep map between ww_acquire_init() and + * ww_acquire_fini(). + */ + struct lockdep_map first_lock_dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; @@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0); + lockdep_init_map_wait(&ctx->first_lock_dep_map, ww_class->mutex_name, + &ww_class->mutex_key, 0, LD_WAIT_SLEEP); mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); + mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_); #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; @@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC + mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 170fdee6339c..79c781875c09 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -17,6 +17,8 @@ * @WWAN_PORT_FIREHOSE: XML based command protocol * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems * @WWAN_PORT_FASTBOOT: Fastboot protocol control + * @WWAN_PORT_ADB: ADB protocol control + * @WWAN_PORT_MIPC: MTK MIPC diagnostic interface * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -30,6 +32,8 @@ enum wwan_port_type { WWAN_PORT_FIREHOSE, WWAN_PORT_XMMRPC, WWAN_PORT_FASTBOOT, + WWAN_PORT_ADB, + WWAN_PORT_MIPC, /* Add new port types above this line */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index d20051865800..86b0d47984a1 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -19,6 +19,10 @@ #include <linux/user_namespace.h> #include <uapi/linux/xattr.h> +/* List of all open_how "versions". */ +#define XATTR_ARGS_SIZE_VER0 16 /* sizeof first published struct */ +#define XATTR_ARGS_SIZE_LATEST XATTR_ARGS_SIZE_VER0 + struct inode; struct dentry; diff --git a/include/linux/xz.h b/include/linux/xz.h index 7285ca5d56e9..58ae1d746c6f 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -1,11 +1,10 @@ +/* SPDX-License-Identifier: 0BSD */ + /* * XZ decompressor * * Authors: Lasse Collin <lasse.collin@tukaani.org> * Igor Pavlov <https://7-zip.org/> - * - * This file has been put into the public domain. - * You can do whatever you want with this file. */ #ifndef XZ_H @@ -19,11 +18,6 @@ # include <stdint.h> #endif -/* In Linux, this is used to make extern functions static when needed. */ -#ifndef XZ_EXTERN -# define XZ_EXTERN extern -#endif - /** * enum xz_mode - Operation mode * @@ -143,7 +137,7 @@ struct xz_buf { size_t out_size; }; -/** +/* * struct xz_dec - Opaque type to hold the XZ decoder state */ struct xz_dec; @@ -191,7 +185,7 @@ struct xz_dec; * ready to be used with xz_dec_run(). If memory allocation fails, * xz_dec_init() returns NULL. */ -XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); +struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); /** * xz_dec_run() - Run the XZ decoder @@ -211,7 +205,7 @@ XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max); * get that amount valid data from the beginning of the stream. You must use * the multi-call decoder if you don't want to uncompress the whole stream. */ -XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); +enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); /** * xz_dec_reset() - Reset an already allocated decoder state @@ -224,32 +218,38 @@ XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b); * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in * multi-call mode. */ -XZ_EXTERN void xz_dec_reset(struct xz_dec *s); +void xz_dec_reset(struct xz_dec *s); /** * xz_dec_end() - Free the memory allocated for the decoder state * @s: Decoder state allocated using xz_dec_init(). If s is NULL, * this function does nothing. */ -XZ_EXTERN void xz_dec_end(struct xz_dec *s); +void xz_dec_end(struct xz_dec *s); -/* - * Decompressor for MicroLZMA, an LZMA variant with a very minimal header. - * See xz_dec_microlzma_alloc() below for details. +/** + * DOC: MicroLZMA decompressor + * + * This MicroLZMA header format was created for use in EROFS but may be used + * by others too. **In most cases one needs the XZ APIs above instead.** * - * These functions aren't used or available in preboot code and thus aren't - * marked with XZ_EXTERN. This avoids warnings about static functions that - * are never defined. + * The compressed format supported by this decoder is a raw LZMA stream + * whose first byte (always 0x00) has been replaced with bitwise-negation + * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is + * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. + * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream + * marker must not be used. The unused values are reserved for future use. */ -/** + +/* * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state */ struct xz_dec_microlzma; /** * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder - * @mode XZ_SINGLE or XZ_PREALLOC - * @dict_size LZMA dictionary size. This must be at least 4 KiB and + * @mode: XZ_SINGLE or XZ_PREALLOC + * @dict_size: LZMA dictionary size. This must be at least 4 KiB and * at most 3 GiB. * * In contrast to xz_dec_init(), this function only allocates the memory @@ -262,40 +262,30 @@ struct xz_dec_microlzma; * On success, xz_dec_microlzma_alloc() returns a pointer to * struct xz_dec_microlzma. If memory allocation fails or * dict_size is invalid, NULL is returned. - * - * The compressed format supported by this decoder is a raw LZMA stream - * whose first byte (always 0x00) has been replaced with bitwise-negation - * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is - * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. - * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream - * marker must not be used. The unused values are reserved for future use. - * This MicroLZMA header format was created for use in EROFS but may be used - * by others too. */ -extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, - uint32_t dict_size); +struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size); /** * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state - * @s Decoder state allocated using xz_dec_microlzma_alloc() - * @comp_size Compressed size of the input stream - * @uncomp_size Uncompressed size of the input stream. A value smaller + * @s: Decoder state allocated using xz_dec_microlzma_alloc() + * @comp_size: Compressed size of the input stream + * @uncomp_size: Uncompressed size of the input stream. A value smaller * than the real uncompressed size of the input stream can * be specified if uncomp_size_is_exact is set to false. * uncomp_size can never be set to a value larger than the * expected real uncompressed size because it would eventually * result in XZ_DATA_ERROR. - * @uncomp_size_is_exact This is an int instead of bool to avoid + * @uncomp_size_is_exact: This is an int instead of bool to avoid * requiring stdbool.h. This should normally be set to true. * When this is set to false, error detection is weaker. */ -extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, - uint32_t comp_size, uint32_t uncomp_size, - int uncomp_size_is_exact); +void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, + uint32_t uncomp_size, int uncomp_size_is_exact); /** * xz_dec_microlzma_run() - Run the MicroLZMA decoder - * @s Decoder state initialized using xz_dec_microlzma_reset() + * @s: Decoder state initialized using xz_dec_microlzma_reset() * @b: Input and output buffers * * This works similarly to xz_dec_run() with a few important differences. @@ -329,15 +319,14 @@ extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, * may be changed normally like with XZ_PREALLOC. This way input data can be * provided from non-contiguous memory. */ -extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, - struct xz_buf *b); +enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, struct xz_buf *b); /** * xz_dec_microlzma_end() - Free the memory allocated for the decoder state * @s: Decoder state allocated using xz_dec_microlzma_alloc(). * If s is NULL, this function does nothing. */ -extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); +void xz_dec_microlzma_end(struct xz_dec_microlzma *s); /* * Standalone build (userspace build or in-kernel build for boot time use) @@ -358,13 +347,13 @@ extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); * This must be called before any other xz_* function to initialize * the CRC32 lookup table. */ -XZ_EXTERN void xz_crc32_init(void); +void xz_crc32_init(void); /* * Update CRC32 value using the polynomial from IEEE-802.3. To start a new * calculation, the third argument must be zero. To continue the calculation, * the previously returned value is passed as the third argument. */ -XZ_EXTERN uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); +uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc); #endif #endif diff --git a/include/linux/zstd.h b/include/linux/zstd.h index 113408eef6ec..b2c7cf310c8f 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -77,6 +77,30 @@ int zstd_min_clevel(void); */ int zstd_max_clevel(void); +/** + * zstd_default_clevel() - default compression level + * + * Return: Default compression level. + */ +int zstd_default_clevel(void); + +/** + * struct zstd_custom_mem - custom memory allocation + */ +typedef ZSTD_customMem zstd_custom_mem; + +/** + * struct zstd_dict_load_method - Dictionary load method. + * See zstd_lib.h. + */ +typedef ZSTD_dictLoadMethod_e zstd_dict_load_method; + +/** + * struct zstd_dict_content_type - Dictionary context type. + * See zstd_lib.h. + */ +typedef ZSTD_dictContentType_e zstd_dict_content_type; + /* ====== Parameter Selection ====== */ /** @@ -136,6 +160,19 @@ typedef ZSTD_parameters zstd_parameters; zstd_parameters zstd_get_params(int level, unsigned long long estimated_src_size); + +/** + * zstd_get_cparams() - returns zstd_compression_parameters for selected level + * @level: The compression level + * @estimated_src_size: The estimated source size to compress or 0 + * if unknown. + * @dict_size: Dictionary size. + * + * Return: The selected zstd_compression_parameters. + */ +zstd_compression_parameters zstd_get_cparams(int level, + unsigned long long estimated_src_size, size_t dict_size); + /* ====== Single-pass Compression ====== */ typedef ZSTD_CCtx zstd_cctx; @@ -180,6 +217,71 @@ zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size); size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, const void *src, size_t src_size, const zstd_parameters *parameters); +/** + * zstd_create_cctx_advanced() - Create compression context + * @custom_mem: Custom allocator. + * + * Return: NULL on error, pointer to compression context otherwise. + */ +zstd_cctx *zstd_create_cctx_advanced(zstd_custom_mem custom_mem); + +/** + * zstd_free_cctx() - Free compression context + * @cdict: Pointer to compression context. + * + * Return: Always 0. + */ +size_t zstd_free_cctx(zstd_cctx* cctx); + +/** + * struct zstd_cdict - Compression dictionary. + * See zstd_lib.h. + */ +typedef ZSTD_CDict zstd_cdict; + +/** + * zstd_create_cdict_byreference() - Create compression dictionary + * @dict: Pointer to dictionary buffer. + * @dict_size: Size of the dictionary buffer. + * @dict_load_method: Dictionary load method. + * @dict_content_type: Dictionary content type. + * @custom_mem: Memory allocator. + * + * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be + * free before zstd_cdict is destroyed. + * + * Return: NULL on error, pointer to compression dictionary + * otherwise. + */ +zstd_cdict *zstd_create_cdict_byreference(const void *dict, size_t dict_size, + zstd_compression_parameters cparams, + zstd_custom_mem custom_mem); + +/** + * zstd_free_cdict() - Free compression dictionary + * @cdict: Pointer to compression dictionary. + * + * Return: Always 0. + */ +size_t zstd_free_cdict(zstd_cdict* cdict); + +/** + * zstd_compress_using_cdict() - compress src into dst using a dictionary + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @dst: The buffer to compress src into. + * @dst_capacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @src_size: The size of the data to compress. + * @cdict: The dictionary to be used. + * + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_using_cdict(zstd_cctx *cctx, void *dst, + size_t dst_capacity, const void *src, size_t src_size, + const zstd_cdict *cdict); + /* ====== Single-pass Decompression ====== */ typedef ZSTD_DCtx zstd_dctx; @@ -220,6 +322,71 @@ zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size); size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, const void *src, size_t src_size); +/** + * struct zstd_ddict - Decompression dictionary. + * See zstd_lib.h. + */ +typedef ZSTD_DDict zstd_ddict; + +/** + * zstd_create_ddict_byreference() - Create decompression dictionary + * @dict: Pointer to dictionary buffer. + * @dict_size: Size of the dictionary buffer. + * @dict_load_method: Dictionary load method. + * @dict_content_type: Dictionary content type. + * @custom_mem: Memory allocator. + * + * Note, this uses @dict by reference (ZSTD_dlm_byRef), so it should be + * free before zstd_ddict is destroyed. + * + * Return: NULL on error, pointer to decompression dictionary + * otherwise. + */ +zstd_ddict *zstd_create_ddict_byreference(const void *dict, size_t dict_size, + zstd_custom_mem custom_mem); +/** + * zstd_free_ddict() - Free decompression dictionary + * @dict: Pointer to the dictionary. + * + * Return: Always 0. + */ +size_t zstd_free_ddict(zstd_ddict *ddict); + +/** + * zstd_create_dctx_advanced() - Create decompression context + * @custom_mem: Custom allocator. + * + * Return: NULL on error, pointer to decompression context otherwise. + */ +zstd_dctx *zstd_create_dctx_advanced(zstd_custom_mem custom_mem); + +/** + * zstd_free_dctx() -- Free decompression context + * @dctx: Pointer to decompression context. + * Return: Always 0. + */ +size_t zstd_free_dctx(zstd_dctx *dctx); + +/** + * zstd_decompress_using_ddict() - decompress src into dst using a dictionary + * @dctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dst_capacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @src_size: The exact size of the data to decompress. + * @ddict: The dictionary to be used. + * + * Return: The decompressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_decompress_using_ddict(zstd_dctx *dctx, + void *dst, size_t dst_capacity, const void *src, size_t src_size, + const zstd_ddict *ddict); + + /* ====== Streaming Buffers ====== */ /** diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 6cecb4a4f68b..d961ead91bf1 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -7,23 +7,21 @@ struct lruvec; -extern atomic_t zswap_stored_pages; +extern atomic_long_t zswap_stored_pages; #ifdef CONFIG_ZSWAP struct zswap_lruvec_state { /* - * Number of pages in zswap that should be protected from the shrinker. - * This number is an estimate of the following counts: + * Number of swapped in pages from disk, i.e not found in the zswap pool. * - * a) Recent page faults. - * b) Recent insertion to the zswap LRU. This includes new zswap stores, - * as well as recent zswap LRU rotations. - * - * These pages are likely to be warm, and might incur IO if the are written - * to swap. + * This is consumed and subtracted from the lru size in + * zswap_shrinker_count() to penalize past overshrinking that led to disk + * swapins. The idea is that had we considered this many more pages in the + * LRU active/protected and not written them back, we would not have had to + * swapped them in. */ - atomic_long_t nr_zswap_protected; + atomic_long_t nr_disk_swapins; }; unsigned long zswap_total_pages(void); |
