diff options
| author | Russell King (Oracle) <rmk+kernel@armlinux.org.uk> | 2024-06-10 14:03:21 +0300 |
|---|---|---|
| committer | Russell King (Oracle) <rmk+kernel@armlinux.org.uk> | 2024-06-10 14:03:21 +0300 |
| commit | 594ce0b8a998aa4d05827cd7c0d0dcec9a1e3ae2 (patch) | |
| tree | 070bd60a8fda15e5f47339d3f6888a0fe2ca6fe9 /include/linux | |
| parent | 616501eccb58615f8f352a29239ea6c6fc5e6546 (diff) | |
| parent | e3cf20e5c68df604315ab30bdbe15dc8a5da556b (diff) | |
| download | linux-594ce0b8a998aa4d05827cd7c0d0dcec9a1e3ae2.tar.xz | |
Merge topic branches 'clkdev' and 'fixes' into for-linus
Diffstat (limited to 'include/linux')
538 files changed, 13875 insertions, 5961 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b7165e52b3c6..28c3fb2bef0d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -242,9 +242,6 @@ static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) return gicc->flags & ACPI_MADT_ENABLED; } -/* the following numa functions are architecture-dependent */ -void acpi_numa_slit_init (struct acpi_table_slit *slit); - #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else @@ -267,8 +264,6 @@ static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } #endif -int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma); - #ifndef PHYS_CPUID_INVALID typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) @@ -421,7 +416,7 @@ extern char *wmi_get_acpi_device_uid(const char *guid); extern char acpi_video_backlight_string[]; extern long acpi_is_video_device(acpi_handle handle); -extern int acpi_blacklisted(void); + extern void acpi_osi_setup(char *str); extern bool acpi_osi_is_win8(void); @@ -573,9 +568,13 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_CPCV2_SUPPORT 0x00000040 #define OSC_SB_PCLPI_SUPPORT 0x00000080 #define OSC_SB_OSLPI_SUPPORT 0x00000100 +#define OSC_SB_FAST_THERMAL_SAMPLING_SUPPORT 0x00000200 +#define OSC_SB_OVER_16_PSTATES_SUPPORT 0x00000400 +#define OSC_SB_GED_SUPPORT 0x00000800 #define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000 -#define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00002000 +#define OSC_SB_IRQ_RESOURCE_SOURCE_SUPPORT 0x00002000 #define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000 +#define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00020000 #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 #define OSC_SB_PRM_SUPPORT 0x00200000 #define OSC_SB_FFH_OPR_SUPPORT 0x00400000 @@ -1170,6 +1169,7 @@ static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif #ifdef CONFIG_ACPI +char *acpi_handle_path(acpi_handle handle); __printf(3, 4) void acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...); @@ -1232,7 +1232,7 @@ bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); bool acpi_gpio_get_io_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, int index, +int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, @@ -1245,7 +1245,7 @@ static inline bool acpi_gpio_get_io_resource(struct acpi_resource *ares, { return false; } -static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *name, +static inline int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable) { return -ENXIO; @@ -1258,10 +1258,10 @@ static inline int acpi_dev_gpio_irq_wake_get(struct acpi_device *adev, int index return acpi_dev_gpio_irq_wake_get_by(adev, NULL, index, wake_capable); } -static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *con_id, int index) { - return acpi_dev_gpio_irq_wake_get_by(adev, name, index, NULL); + return acpi_dev_gpio_irq_wake_get_by(adev, con_id, index, NULL); } static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) @@ -1547,4 +1547,25 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } +#ifdef CONFIG_ACPI_HMAT +int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, + enum access_coordinate_class access); +#else +static inline int hmat_update_target_coordinates(int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ + return -EOPNOTSUPP; +} +#endif + +#ifdef CONFIG_ACPI_NUMA +bool acpi_node_backed_by_real_pxm(int nid); +#else +static inline bool acpi_node_backed_by_real_pxm(int nid) +{ + return false; +} +#endif + #endif /*_LINUX_ACPI_H*/ diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 1cb65592c95d..d4ed5622cf2b 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -39,7 +39,7 @@ void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head); /* IOMMU interface */ -int iort_dma_get_ranges(struct device *dev, u64 *size); +int iort_dma_get_ranges(struct device *dev, u64 *limit); int iort_iommu_configure_id(struct device *dev, const u32 *id_in); void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head); phys_addr_t acpi_iort_dma_get_max_cpu_address(void); @@ -55,7 +55,7 @@ void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *hea static inline void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode, struct list_head *head) { } /* IOMMU interface */ -static inline int iort_dma_get_ranges(struct device *dev, u64 *size) +static inline int iort_dma_get_ranges(struct device *dev, u64 *limit) { return -ENODEV; } static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in) { return -ENODEV; } diff --git a/include/linux/aer.h b/include/linux/aer.h index ae0fae70d4bd..4b97f38f3fcf 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -18,11 +18,8 @@ struct pci_dev; -struct aer_header_log_regs { - u32 dw0; - u32 dw1; - u32 dw2; - u32 dw3; +struct pcie_tlp_log { + u32 dw[4]; }; struct aer_capability_regs { @@ -33,13 +30,15 @@ struct aer_capability_regs { u32 cor_status; u32 cor_mask; u32 cap_control; - struct aer_header_log_regs header_log; + struct pcie_tlp_log header_log; u32 root_command; u32 root_status; u16 cor_err_source; u16 uncor_err_source; }; +int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log); + #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h new file mode 100644 index 000000000000..abd24016a900 --- /dev/null +++ b/include/linux/alloc_tag.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * allocation tagging + */ +#ifndef _LINUX_ALLOC_TAG_H +#define _LINUX_ALLOC_TAG_H + +#include <linux/bug.h> +#include <linux/codetag.h> +#include <linux/container_of.h> +#include <linux/preempt.h> +#include <asm/percpu.h> +#include <linux/cpumask.h> +#include <linux/smp.h> +#include <linux/static_key.h> +#include <linux/irqflags.h> + +struct alloc_tag_counters { + u64 bytes; + u64 calls; +}; + +/* + * An instance of this structure is created in a special ELF section at every + * allocation callsite. At runtime, the special section is treated as + * an array of these. Embedded codetag utilizes codetag framework. + */ +struct alloc_tag { + struct codetag ct; + struct alloc_tag_counters __percpu *counters; +} __aligned(8); + +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + +#define CODETAG_EMPTY ((void *)1) + +static inline bool is_codetag_empty(union codetag_ref *ref) +{ + return ref->ct == CODETAG_EMPTY; +} + +static inline void set_codetag_empty(union codetag_ref *ref) +{ + if (ref) + ref->ct = CODETAG_EMPTY; +} + +#else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + +static inline bool is_codetag_empty(union codetag_ref *ref) { return false; } +static inline void set_codetag_empty(union codetag_ref *ref) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ + +#ifdef CONFIG_MEM_ALLOC_PROFILING + +struct codetag_bytes { + struct codetag *ct; + s64 bytes; +}; + +size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep); + +static inline struct alloc_tag *ct_to_alloc_tag(struct codetag *ct) +{ + return container_of(ct, struct alloc_tag, ct); +} + +#ifdef ARCH_NEEDS_WEAK_PER_CPU +/* + * When percpu variables are required to be defined as weak, static percpu + * variables can't be used inside a function (see comments for DECLARE_PER_CPU_SECTION). + * Instead we will accound all module allocations to a single counter. + */ +DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section("alloc_tags") = { \ + .ct = CODE_TAG_INIT, \ + .counters = &_shared_alloc_tag }; + +#else /* ARCH_NEEDS_WEAK_PER_CPU */ + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section("alloc_tags") = { \ + .ct = CODE_TAG_INIT, \ + .counters = &_alloc_tag_cntr }; + +#endif /* ARCH_NEEDS_WEAK_PER_CPU */ + +DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + mem_alloc_profiling_key); + +static inline bool mem_alloc_profiling_enabled(void) +{ + return static_branch_maybe(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, + &mem_alloc_profiling_key); +} + +static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag) +{ + struct alloc_tag_counters v = { 0, 0 }; + struct alloc_tag_counters *counter; + int cpu; + + for_each_possible_cpu(cpu) { + counter = per_cpu_ptr(tag->counters, cpu); + v.bytes += counter->bytes; + v.calls += counter->calls; + } + + return v; +} + +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG +static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) +{ + WARN_ONCE(ref && ref->ct, + "alloc_tag was not cleared (got tag for %s:%u)\n", + ref->ct->filename, ref->ct->lineno); + + WARN_ONCE(!tag, "current->alloc_tag not set"); +} + +static inline void alloc_tag_sub_check(union codetag_ref *ref) +{ + WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n"); +} +#else +static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {} +static inline void alloc_tag_sub_check(union codetag_ref *ref) {} +#endif + +/* Caller should verify both ref and tag to be valid */ +static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +{ + ref->ct = &tag->ct; + /* + * We need in increment the call counter every time we have a new + * allocation or when we split a large allocation into smaller ones. + * Each new reference for every sub-allocation needs to increment call + * counter because when we free each part the counter will be decremented. + */ + this_cpu_inc(tag->counters->calls); +} + +static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) +{ + alloc_tag_add_check(ref, tag); + if (!ref || !tag) + return; + + __alloc_tag_ref_set(ref, tag); +} + +static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) +{ + alloc_tag_add_check(ref, tag); + if (!ref || !tag) + return; + + __alloc_tag_ref_set(ref, tag); + this_cpu_add(tag->counters->bytes, bytes); +} + +static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) +{ + struct alloc_tag *tag; + + alloc_tag_sub_check(ref); + if (!ref || !ref->ct) + return; + + if (is_codetag_empty(ref)) { + ref->ct = NULL; + return; + } + + tag = ct_to_alloc_tag(ref->ct); + + this_cpu_sub(tag->counters->bytes, bytes); + this_cpu_dec(tag->counters->calls); + + ref->ct = NULL; +} + +#define alloc_tag_record(p) ((p) = current->alloc_tag) + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +#define DEFINE_ALLOC_TAG(_alloc_tag) +static inline bool mem_alloc_profiling_enabled(void) { return false; } +static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, + size_t bytes) {} +static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes) {} +#define alloc_tag_record(p) do {} while (0) + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + +#define alloc_hooks_tag(_tag, _do_alloc) \ +({ \ + struct alloc_tag * __maybe_unused _old = alloc_tag_save(_tag); \ + typeof(_do_alloc) _res = _do_alloc; \ + alloc_tag_restore(_tag, _old); \ + _res; \ +}) + +#define alloc_hooks(_do_alloc) \ +({ \ + DEFINE_ALLOC_TAG(_alloc_tag); \ + alloc_hooks_tag(&_alloc_tag, _do_alloc); \ +}) + +#endif /* _LINUX_ALLOC_TAG_H */ diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index c60a6a14638c..958a55bcc708 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -112,11 +112,18 @@ extern struct bus_type amba_bustype; #define amba_get_drvdata(d) dev_get_drvdata(&d->dev) #define amba_set_drvdata(d,p) dev_set_drvdata(&d->dev, p) +/* + * use a macro to avoid include chaining to get THIS_MODULE + */ +#define amba_driver_register(drv) \ + __amba_driver_register(drv, THIS_MODULE) + #ifdef CONFIG_ARM_AMBA -int amba_driver_register(struct amba_driver *); +int __amba_driver_register(struct amba_driver *, struct module *); void amba_driver_unregister(struct amba_driver *); #else -static inline int amba_driver_register(struct amba_driver *drv) +static inline int __amba_driver_register(struct amba_driver *drv, + struct module *owner) { return -EINVAL; } diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h index 9bf58aac0df2..d7b07d0311e1 100644 --- a/include/linux/amba/pl022.h +++ b/include/linux/amba/pl022.h @@ -16,6 +16,7 @@ #ifndef _SSP_PL022_H #define _SSP_PL022_H +#include <linux/dmaengine.h> #include <linux/types.h> /** @@ -224,6 +225,7 @@ struct dma_chan; * struct pl022_ssp_master - device.platform_data for SPI controller devices. * @bus_id: identifier for this bus * @enable_dma: if true enables DMA driven transfers. + * @dma_filter: callback filter for dma_request_channel. * @dma_rx_param: parameter to locate an RX DMA channel. * @dma_tx_param: parameter to locate a TX DMA channel. * @autosuspend_delay: delay in ms following transfer completion before the @@ -235,7 +237,7 @@ struct dma_chan; struct pl022_ssp_controller { u16 bus_id; u8 enable_dma:1; - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; void *dma_rx_param; void *dma_tx_param; int autosuspend_delay; diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index dc7ed2f46886..2b90c48a6a87 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -85,8 +85,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); struct amd_iommu *get_amd_iommu(unsigned int idx); -#ifdef CONFIG_AMD_MEM_ENCRYPT -int amd_iommu_snp_enable(void); +#ifdef CONFIG_KVM_AMD_SEV +int amd_iommu_snp_disable(void); +#else +static inline int amd_iommu_snp_disable(void) { return 0; } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 6ad02ad9c7b4..d58fc022ec46 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -39,19 +39,31 @@ struct amd_aperf_mperf { * @cppc_req_cached: cached performance request hints * @highest_perf: the maximum performance an individual processor may reach, * assuming ideal conditions + * For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the fixed value as the highest_perf. * @nominal_perf: the maximum sustained performance level of the processor, * assuming ideal operating conditions * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power * savings are achieved * @lowest_perf: the absolute lowest performance level of the processor - * @max_freq: the frequency that mapped to highest_perf - * @min_freq: the frequency that mapped to lowest_perf - * @nominal_freq: the frequency that mapped to nominal_perf - * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher + * priority. + * @min_limit_perf: Cached value of the performance corresponding to policy->min + * @max_limit_perf: Cached value of the performance corresponding to policy->max + * @min_limit_freq: Cached value of policy->min (in khz) + * @max_limit_freq: Cached value of policy->max (in khz) + * @max_freq: the frequency (in khz) that mapped to highest_perf + * @min_freq: the frequency (in khz) that mapped to lowest_perf + * @nominal_freq: the frequency (in khz) that mapped to nominal_perf + * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf * @cur: Difference of Aperf/Mperf/tsc count between last and current sample * @prev: Last Aperf/Mperf/tsc count value read from register - * @freq: current cpu frequency value + * @freq: current cpu frequency value (in khz) * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value @@ -70,6 +82,7 @@ struct amd_cpudata { u32 nominal_perf; u32 lowest_nonlinear_perf; u32 lowest_perf; + u32 prefcore_ranking; u32 min_limit_perf; u32 max_limit_perf; u32 min_limit_freq; @@ -85,6 +98,7 @@ struct amd_cpudata { u64 freq; bool boost_supported; + bool hw_prefcore; /* EPP feature related attributes*/ s16 epp_policy; @@ -114,4 +128,10 @@ static const char * const amd_pstate_mode_string[] = { [AMD_PSTATE_GUIDED] = "guided", NULL, }; + +struct quirk_entry { + u32 nominal_freq; + u32 lowest_freq; +}; + #endif /* _LINUX_AMD_PSTATE_H */ diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h index 93a5f16d03f3..edef565c2a1a 100644 --- a/include/linux/anon_inodes.h +++ b/include/linux/anon_inodes.h @@ -9,12 +9,17 @@ #ifndef _LINUX_ANON_INODES_H #define _LINUX_ANON_INODES_H +#include <linux/types.h> + struct file_operations; struct inode; struct file *anon_inode_getfile(const char *name, const struct file_operations *fops, void *priv, int flags); +struct file *anon_inode_getfile_fmode(const char *name, + const struct file_operations *fops, + void *priv, int flags, fmode_t f_mode); struct file *anon_inode_create_getfile(const char *name, const struct file_operations *fops, void *priv, int flags, diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index a63d61ca55af..b721f360d759 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -60,14 +60,14 @@ void topology_scale_freq_tick(void); void topology_set_scale_freq_source(struct scale_freq_data *data, const struct cpumask *cpus); void topology_clear_scale_freq_source(enum scale_freq_source source, const struct cpumask *cpus); -DECLARE_PER_CPU(unsigned long, thermal_pressure); +DECLARE_PER_CPU(unsigned long, hw_pressure); -static inline unsigned long topology_get_thermal_pressure(int cpu) +static inline unsigned long topology_get_hw_pressure(int cpu) { - return per_cpu(thermal_pressure, cpu); + return per_cpu(hw_pressure, cpu); } -void topology_update_thermal_pressure(const struct cpumask *cpus, +void topology_update_hw_pressure(const struct cpumask *cpus, unsigned long capped_freq); struct cpu_topology { diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 3d0fde57ba90..c82d56768101 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -126,6 +126,7 @@ /* FFA Bus/Device/Driver related */ struct ffa_device { u32 id; + u32 properties; int vm_id; bool mode_32bit; uuid_t uuid; @@ -209,7 +210,7 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } #define module_ffa_driver(__ffa_driver) \ module_driver(__ffa_driver, ffa_register, ffa_unregister) -extern struct bus_type ffa_bus_type; +extern const struct bus_type ffa_bus_type; /* FFA transport related */ struct ffa_partition_info { @@ -221,12 +222,29 @@ struct ffa_partition_info { #define FFA_PARTITION_DIRECT_SEND BIT(1) /* partition can send and receive indirect messages. */ #define FFA_PARTITION_INDIRECT_MSG BIT(2) +/* partition can receive notifications */ +#define FFA_PARTITION_NOTIFICATION_RECV BIT(3) /* partition runs in the AArch64 execution state. */ #define FFA_PARTITION_AARCH64_EXEC BIT(8) u32 properties; u32 uuid[4]; }; +static inline +bool ffa_partition_check_property(struct ffa_device *dev, u32 property) +{ + return dev->properties & property; +} + +#define ffa_partition_supports_notify_recv(dev) \ + ffa_partition_check_property(dev, FFA_PARTITION_NOTIFICATION_RECV) + +#define ffa_partition_supports_indirect_msg(dev) \ + ffa_partition_check_property(dev, FFA_PARTITION_INDIRECT_MSG) + +#define ffa_partition_supports_direct_recv(dev) \ + ffa_partition_check_property(dev, FFA_PARTITION_DIRECT_RECV) + /* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP} which pass data via registers */ struct ffa_send_direct_data { unsigned long data0; /* w3/x3 */ @@ -236,6 +254,14 @@ struct ffa_send_direct_data { unsigned long data4; /* w7/x7 */ }; +struct ffa_indirect_msg_hdr { + u32 flags; + u32 res0; + u32 offset; + u32 send_recv_id; + u32 size; +}; + struct ffa_mem_region_addr_range { /* The base IPA of the constituent memory region, aligned to 4 kiB */ u64 address; @@ -396,6 +422,7 @@ struct ffa_msg_ops { void (*mode_32bit_set)(struct ffa_device *dev); int (*sync_send_receive)(struct ffa_device *dev, struct ffa_send_direct_data *data); + int (*indirect_send)(struct ffa_device *dev, void *buf, size_t sz); }; struct ffa_mem_ops { diff --git a/include/linux/async.h b/include/linux/async.h index 33c9ff4afb49..19b778d08600 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -120,4 +120,5 @@ extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); extern bool current_is_async(void); +extern void async_init(void); #endif diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 5e95faa959c4..956bcba5dbf2 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -2005,6 +2005,7 @@ raw_atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere. * @@ -2033,6 +2034,7 @@ raw_atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere. * @@ -2061,6 +2063,7 @@ raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere. * @@ -2088,6 +2091,7 @@ raw_atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere. * @@ -2112,7 +2116,8 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere. * @@ -2145,7 +2150,8 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere. * @@ -2178,7 +2184,8 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere. * @@ -2210,7 +2217,8 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere. * @@ -2403,6 +2411,7 @@ raw_atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere. * @@ -2432,6 +2441,7 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_add_unless() elsewhere. * @@ -2452,6 +2462,7 @@ raw_atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere. * @@ -2472,6 +2483,7 @@ raw_atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere. * @@ -2499,6 +2511,7 @@ raw_atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere. * @@ -2526,6 +2539,7 @@ raw_atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere. * @@ -4117,6 +4131,7 @@ raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere. * @@ -4145,6 +4160,7 @@ raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere. * @@ -4173,6 +4189,7 @@ raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere. * @@ -4200,6 +4217,7 @@ raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere. * @@ -4224,7 +4242,8 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere. * @@ -4257,7 +4276,8 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere. * @@ -4290,7 +4310,8 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere. * @@ -4322,7 +4343,8 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere. * @@ -4515,6 +4537,7 @@ raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere. * @@ -4544,6 +4567,7 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere. * @@ -4564,6 +4588,7 @@ raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere. * @@ -4584,6 +4609,7 @@ raw_atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere. * @@ -4611,6 +4637,7 @@ raw_atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere. * @@ -4638,6 +4665,7 @@ raw_atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere. * @@ -4662,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// eec048affea735b8464f58e6d96992101f8f85f1 +// 14850c0b0db20c62fdc78ccd1d42b98b88d76331 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index 54d7bbe0aeaa..debd487fe971 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -1182,6 +1182,7 @@ atomic_xchg_relaxed(atomic_t *v, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there. * @@ -1202,6 +1203,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there. * @@ -1221,6 +1223,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there. * @@ -1241,6 +1244,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there. * @@ -1260,7 +1264,8 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there. * @@ -1282,7 +1287,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there. * @@ -1303,7 +1309,8 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there. * @@ -1325,7 +1332,8 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there. * @@ -1475,6 +1483,7 @@ atomic_add_negative_relaxed(int i, atomic_t *v) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there. * @@ -1495,6 +1504,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_add_unless() there. * @@ -1513,6 +1523,7 @@ atomic_add_unless(atomic_t *v, int a, int u) * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there. * @@ -1531,6 +1542,7 @@ atomic_inc_not_zero(atomic_t *v) * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there. * @@ -1549,6 +1561,7 @@ atomic_inc_unless_negative(atomic_t *v) * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there. * @@ -1567,6 +1580,7 @@ atomic_dec_unless_positive(atomic_t *v) * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there. * @@ -2746,6 +2760,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there. * @@ -2766,6 +2781,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there. * @@ -2785,6 +2801,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there. * @@ -2805,6 +2822,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there. * @@ -2824,7 +2842,8 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there. * @@ -2846,7 +2865,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there. * @@ -2867,7 +2887,8 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there. * @@ -2889,7 +2910,8 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there. * @@ -3039,6 +3061,7 @@ atomic64_add_negative_relaxed(s64 i, atomic64_t *v) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there. * @@ -3059,6 +3082,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_add_unless() there. * @@ -3077,6 +3101,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there. * @@ -3095,6 +3120,7 @@ atomic64_inc_not_zero(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there. * @@ -3113,6 +3139,7 @@ atomic64_inc_unless_negative(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there. * @@ -3131,6 +3158,7 @@ atomic64_dec_unless_positive(atomic64_t *v) * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there. * @@ -4310,6 +4338,7 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there. * @@ -4330,6 +4359,7 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there. * @@ -4349,6 +4379,7 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there. * @@ -4369,6 +4400,7 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there. * @@ -4388,7 +4420,8 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there. * @@ -4410,7 +4443,8 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there. * @@ -4431,7 +4465,8 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there. * @@ -4453,7 +4488,8 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there. * @@ -4603,6 +4639,7 @@ atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there. * @@ -4623,6 +4660,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there. * @@ -4641,6 +4679,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there. * @@ -4659,6 +4698,7 @@ atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there. * @@ -4677,6 +4717,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there. * @@ -4695,6 +4736,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there. * @@ -5008,4 +5050,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 2cc4bc990fef44d3836ec108f11b610f3f438184 +// ce5b65e0f1f8a276268b667194581d24bed219d4 diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h index c82947170ddc..3ef844b3ab8a 100644 --- a/include/linux/atomic/atomic-long.h +++ b/include/linux/atomic/atomic-long.h @@ -1352,6 +1352,7 @@ raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere. * @@ -1374,6 +1375,7 @@ raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere. * @@ -1396,6 +1398,7 @@ raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere. * @@ -1418,6 +1421,7 @@ raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere. * @@ -1440,7 +1444,8 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere. * @@ -1463,7 +1468,8 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere. * @@ -1486,7 +1492,8 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere. * @@ -1509,7 +1516,8 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new) * @new: long value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. - * Otherwise, updates @old to the current value of @v. + * Otherwise, @v is not modified, @old is updated to the current value of @v, + * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere. * @@ -1677,6 +1685,7 @@ raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere. * @@ -1699,6 +1708,7 @@ raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u) * @u: long value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere. * @@ -1719,6 +1729,7 @@ raw_atomic_long_add_unless(atomic_long_t *v, long a, long u) * @v: pointer to atomic_long_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere. * @@ -1739,6 +1750,7 @@ raw_atomic_long_inc_not_zero(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere. * @@ -1759,6 +1771,7 @@ raw_atomic_long_inc_unless_negative(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere. * @@ -1779,6 +1792,7 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v) * @v: pointer to atomic_long_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. + * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere. * @@ -1795,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v) } #endif /* _LINUX_ATOMIC_LONG_H */ -// 4ef23f98c73cff96d239896175fd26b10b88899e +// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1a97277f99b1..8e7af9a03b41 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -38,7 +38,6 @@ struct backing_dev_info *bdi_alloc(int node_id); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); -void wb_wakeup_delayed(struct bdi_writeback *wb); void wb_wait_for_completion(struct wb_completion *done); diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h index 3f1fe1774f1b..4b61b0e57720 100644 --- a/include/linux/backing-file.h +++ b/include/linux/backing-file.h @@ -22,6 +22,9 @@ struct backing_file_ctx { struct file *backing_file_open(const struct path *user_path, int flags, const struct path *real_path, const struct cred *cred); +struct file *backing_tmpfile_open(const struct path *user_path, int flags, + const struct path *real_parentpath, + umode_t mode, const struct cred *cred); ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, struct kiocb *iocb, int flags, struct backing_file_ctx *ctx); diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 614653e07e3a..19a1c0e22629 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -13,6 +13,7 @@ #include <linux/fb.h> #include <linux/mutex.h> #include <linux/notifier.h> +#include <linux/types.h> /** * enum backlight_update_reason - what method was used to update backlight @@ -110,7 +111,6 @@ enum backlight_scale { }; struct backlight_device; -struct fb_info; /** * struct backlight_ops - backlight operations @@ -160,18 +160,18 @@ struct backlight_ops { int (*get_brightness)(struct backlight_device *); /** - * @check_fb: Check the framebuffer device. + * @controls_device: Check against the display device * - * Check if given framebuffer device is the one bound to this backlight. - * This operation is optional and if not implemented it is assumed that the - * fbdev is always the one bound to the backlight. + * Check if the backlight controls the given display device. This + * operation is optional and if not implemented it is assumed that + * the display is always the one controlled by the backlight. * * RETURNS: * - * If info is NULL or the info matches the fbdev bound to the backlight return true. - * If info does not match the fbdev bound to the backlight return false. + * If display_dev is NULL or display_dev matches the device controlled by + * the backlight, return true. Otherwise return false. */ - int (*check_fb)(struct backlight_device *bd, struct fb_info *info); + bool (*controls_device)(struct backlight_device *bd, struct device *display_dev); }; /** @@ -219,25 +219,6 @@ struct backlight_properties { int power; /** - * @fb_blank: The power state from the FBIOBLANK ioctl. - * - * When the FBIOBLANK ioctl is called @fb_blank is set to the - * blank parameter and the update_status() operation is called. - * - * When the backlight device is enabled @fb_blank is set - * to FB_BLANK_UNBLANK. When the backlight device is disabled - * @fb_blank is set to FB_BLANK_POWERDOWN. - * - * Backlight drivers should avoid using this property. It has been - * replaced by state & BL_CORE_FBLANK (although most drivers should - * use backlight_is_blank() as the preferred means to get the blank - * state). - * - * fb_blank is deprecated and will be removed. - */ - int fb_blank; - - /** * @type: The type of backlight supported. * * The backlight type allows userspace to make appropriate @@ -366,7 +347,6 @@ static inline int backlight_enable(struct backlight_device *bd) return 0; bd->props.power = FB_BLANK_UNBLANK; - bd->props.fb_blank = FB_BLANK_UNBLANK; bd->props.state &= ~BL_CORE_FBBLANK; return backlight_update_status(bd); @@ -382,7 +362,6 @@ static inline int backlight_disable(struct backlight_device *bd) return 0; bd->props.power = FB_BLANK_POWERDOWN; - bd->props.fb_blank = FB_BLANK_POWERDOWN; bd->props.state |= BL_CORE_FBBLANK; return backlight_update_status(bd); @@ -395,15 +374,13 @@ static inline int backlight_disable(struct backlight_device *bd) * Display is expected to be blank if any of these is true:: * * 1) if power in not UNBLANK - * 2) if fb_blank is not UNBLANK - * 3) if state indicate BLANK or SUSPENDED + * 2) if state indicate BLANK or SUSPENDED * * Returns true if display is expected to be blank, false otherwise. */ static inline bool backlight_is_blank(const struct backlight_device *bd) { return bd->props.power != FB_BLANK_UNBLANK || - bd->props.fb_blank != FB_BLANK_UNBLANK || bd->props.state & (BL_CORE_SUSPENDED | BL_CORE_FBBLANK); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 875d792bffff..d5379548d684 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -615,6 +615,13 @@ static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) bl->tail = bl2->tail; } +static inline void bio_list_merge_init(struct bio_list *bl, + struct bio_list *bl2) +{ + bio_list_merge(bl, bl2); + bio_list_init(bl2); +} + static inline void bio_list_merge_head(struct bio_list *bl, struct bio_list *bl2) { @@ -824,5 +831,9 @@ static inline void bio_clear_polled(struct bio *bio) struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); +struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new); + +struct bio *blk_alloc_discard_bio(struct block_device *bdev, + sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask); #endif /* __LINUX_BIO_H */ diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index ebfa12f69501..63928f173223 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -66,7 +66,8 @@ _pfx "mask is not constant"); \ BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ - ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + ~((_mask) >> __bf_shf(_mask)) & \ + (0 + (_val)) : 0, \ _pfx "value too large for the field"); \ BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ __bf_cast_unsigned(_reg, ~0ull), \ diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 99451431e4d6..8c4768c44a01 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,6 +6,7 @@ #include <linux/align.h> #include <linux/bitops.h> +#include <linux/cleanup.h> #include <linux/errno.h> #include <linux/find.h> #include <linux/limits.h> @@ -54,6 +55,7 @@ struct device; * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_weight(src, nbits) Hamming Weight: number set bits * bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap + * bitmap_weight_andnot(src1, src2, nbits) Hamming Weight of andnot'ed bitmap * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area @@ -62,6 +64,8 @@ struct device; * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest * bitmap_replace(dst, old, new, mask, nbits) *dst = (*old & ~(*mask)) | (*new & *mask) + * bitmap_scatter(dst, src, mask, nbits) *dst = map(dense, sparse)(src) + * bitmap_gather(dst, src, mask, nbits) *dst = map(sparse, dense)(src) * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap @@ -79,6 +83,10 @@ struct device; * bitmap_to_arr64(buf, src, nbits) Copy nbits from buf to u64[] dst * bitmap_get_value8(map, start) Get 8bit value from map at start * bitmap_set_value8(map, value, start) Set 8bit value to map at start + * bitmap_read(map, start, nbits) Read an nbits-sized value from + * map at start + * bitmap_write(map, value, start, nbits) Write an nbits-sized value to + * map at start * * Note, bitmap_zero() and bitmap_fill() operate over the region of * unsigned longs, that is, bits behind bitmap till the unsigned long @@ -127,6 +135,8 @@ unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node); unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node); void bitmap_free(const unsigned long *bitmap); +DEFINE_FREE(bitmap, unsigned long *, if (_T) bitmap_free(_T)) + /* Managed variants of the above. */ unsigned long *devm_bitmap_alloc(struct device *dev, unsigned int nbits, gfp_t flags); @@ -169,6 +179,8 @@ bool __bitmap_subset(const unsigned long *bitmap1, unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); unsigned int __bitmap_weight_and(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); +unsigned int __bitmap_weight_andnot(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -214,9 +226,11 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) +#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) + static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + unsigned int len = bitmap_size(nbits); if (small_const_nbits(nbits)) *dst = 0; @@ -226,7 +240,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + unsigned int len = bitmap_size(nbits); if (small_const_nbits(nbits)) *dst = ~0UL; @@ -237,7 +251,7 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + unsigned int len = bitmap_size(nbits); if (small_const_nbits(nbits)) *dst = *src; @@ -425,6 +439,15 @@ unsigned long bitmap_weight_and(const unsigned long *src1, return __bitmap_weight_and(src1, src2, nbits); } +static __always_inline +unsigned long bitmap_weight_andnot(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight_andnot(src1, src2, nbits); +} + static __always_inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { @@ -487,6 +510,107 @@ static inline void bitmap_replace(unsigned long *dst, __bitmap_replace(dst, old, new, mask, nbits); } +/** + * bitmap_scatter - Scatter a bitmap according to the given mask + * @dst: scattered bitmap + * @src: gathered bitmap + * @mask: mask representing bits to assign to in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Scatters bitmap with sequential bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x005a, with @mask = 0x1313, @dst will be 0x0302. + * + * Or in binary form + * @src @mask @dst + * 0000000001011010 0001001100010011 0000001100000010 + * + * (Bits 0, 1, 2, 3, 4, 5 are copied to the bits 0, 1, 4, 8, 9, 12) + * + * A more 'visual' description of the operation:: + * + * src: 0000000001011010 + * |||||| + * +------+||||| + * | +----+|||| + * | |+----+||| + * | || +-+|| + * | || | || + * mask: ...v..vv...v..vv + * ...0..11...0..10 + * dst: 0000001100000010 + * + * A relationship exists between bitmap_scatter() and bitmap_gather(). + * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. + * See bitmap_scatter() for details related to this relationship. + */ +static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(bit, dst, test_bit(n++, src)); +} + +/** + * bitmap_gather - Gather a bitmap according to given mask + * @dst: gathered bitmap + * @src: scattered bitmap + * @mask: mask representing bits to extract from in the scattered bitmap + * @nbits: number of bits in each of these bitmaps + * + * Gathers bitmap with sparse bits according to the given @mask. + * + * Example: + * If @src bitmap = 0x0302, with @mask = 0x1313, @dst will be 0x001a. + * + * Or in binary form + * @src @mask @dst + * 0000001100000010 0001001100010011 0000000000011010 + * + * (Bits 0, 1, 4, 8, 9, 12 are copied to the bits 0, 1, 2, 3, 4, 5) + * + * A more 'visual' description of the operation:: + * + * mask: ...v..vv...v..vv + * src: 0000001100000010 + * ^ ^^ ^ 0 + * | || | 10 + * | || > 010 + * | |+--> 1010 + * | +--> 11010 + * +----> 011010 + * dst: 0000000000011010 + * + * A relationship exists between bitmap_gather() and bitmap_scatter(). See + * bitmap_scatter() for the bitmap scatter detailed operations. + * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n). + * The operation bitmap_gather(result, scattered, mask, n) leads to a result + * equal or equivalent to src. + * + * The result can be 'equivalent' because bitmap_scatter() and bitmap_gather() + * are not bijective. + * The result and src values are equivalent in that sense that a call to + * bitmap_scatter(res, src, mask, n) and a call to + * bitmap_scatter(res, result, mask, n) will lead to the same res value. + */ +static inline void bitmap_gather(unsigned long *dst, const unsigned long *src, + const unsigned long *mask, unsigned int nbits) +{ + unsigned int n = 0; + unsigned int bit; + + bitmap_zero(dst, nbits); + + for_each_set_bit(bit, mask, nbits) + __assign_bit(n++, dst, test_bit(bit, src)); +} + static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) @@ -604,38 +728,83 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) } /** - * bitmap_get_value8 - get an 8-bit value within a memory region + * bitmap_read - read a value of n-bits from the memory region * @map: address to the bitmap memory region - * @start: bit offset of the 8-bit value; must be a multiple of 8 + * @start: bit offset of the n-bit value + * @nbits: size of value in bits, nonzero, up to BITS_PER_LONG * - * Returns the 8-bit value located at the @start bit offset within the @src - * memory region. + * Returns: value of @nbits bits located at the @start bit offset within the + * @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return + * value is undefined. */ -static inline unsigned long bitmap_get_value8(const unsigned long *map, - unsigned long start) +static inline unsigned long bitmap_read(const unsigned long *map, + unsigned long start, + unsigned long nbits) { - const size_t index = BIT_WORD(start); - const unsigned long offset = start % BITS_PER_LONG; + size_t index = BIT_WORD(start); + unsigned long offset = start % BITS_PER_LONG; + unsigned long space = BITS_PER_LONG - offset; + unsigned long value_low, value_high; - return (map[index] >> offset) & 0xFF; + if (unlikely(!nbits || nbits > BITS_PER_LONG)) + return 0; + + if (space >= nbits) + return (map[index] >> offset) & BITMAP_LAST_WORD_MASK(nbits); + + value_low = map[index] & BITMAP_FIRST_WORD_MASK(start); + value_high = map[index + 1] & BITMAP_LAST_WORD_MASK(start + nbits); + return (value_low >> offset) | (value_high << space); } /** - * bitmap_set_value8 - set an 8-bit value within a memory region + * bitmap_write - write n-bit value within a memory region * @map: address to the bitmap memory region - * @value: the 8-bit value; values wider than 8 bits may clobber bitmap - * @start: bit offset of the 8-bit value; must be a multiple of 8 + * @value: value to write, clamped to nbits + * @start: bit offset of the n-bit value + * @nbits: size of value in bits, nonzero, up to BITS_PER_LONG. + * + * bitmap_write() behaves as-if implemented as @nbits calls of __assign_bit(), + * i.e. bits beyond @nbits are ignored: + * + * for (bit = 0; bit < nbits; bit++) + * __assign_bit(start + bit, bitmap, val & BIT(bit)); + * + * For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed. */ -static inline void bitmap_set_value8(unsigned long *map, unsigned long value, - unsigned long start) +static inline void bitmap_write(unsigned long *map, unsigned long value, + unsigned long start, unsigned long nbits) { - const size_t index = BIT_WORD(start); - const unsigned long offset = start % BITS_PER_LONG; - - map[index] &= ~(0xFFUL << offset); + size_t index; + unsigned long offset; + unsigned long space; + unsigned long mask; + bool fit; + + if (unlikely(!nbits || nbits > BITS_PER_LONG)) + return; + + mask = BITMAP_LAST_WORD_MASK(nbits); + value &= mask; + offset = start % BITS_PER_LONG; + space = BITS_PER_LONG - offset; + fit = space >= nbits; + index = BIT_WORD(start); + + map[index] &= (fit ? (~(mask << offset)) : ~BITMAP_FIRST_WORD_MASK(start)); map[index] |= value << offset; + if (fit) + return; + + map[index + 1] &= BITMAP_FIRST_WORD_MASK(start + nbits); + map[index + 1] |= (value >> space); } +#define bitmap_get_value8(map, start) \ + bitmap_read(map, start, BITS_PER_BYTE) +#define bitmap_set_value8(map, value, start) \ + bitmap_write(map, value, start, BITS_PER_BYTE) + #endif /* __ASSEMBLY__ */ #endif /* __LINUX_BITMAP_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2ba557e067fe..46d4bdc634c0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -8,19 +8,14 @@ #include <uapi/linux/kernel.h> -/* Set bits in the first 'n' bytes when loaded from memory */ -#ifdef __LITTLE_ENDIAN -# define aligned_byte_mask(n) ((1UL << 8*(n))-1) -#else -# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) -#endif - #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) #define BITS_TO_BYTES(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) +#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_BYTE) + extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); @@ -80,6 +75,7 @@ __check_bitop_pr(__test_and_set_bit); __check_bitop_pr(__test_and_clear_bit); __check_bitop_pr(__test_and_change_bit); __check_bitop_pr(test_bit); +__check_bitop_pr(test_bit_acquire); #undef __check_bitop_pr @@ -200,7 +196,7 @@ static __always_inline __s64 sign_extend64(__u64 value, int index) return (__s64)(value << shift) >> shift; } -static inline unsigned fls_long(unsigned long l) +static inline unsigned int fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); @@ -236,7 +232,7 @@ static inline int get_count_order_long(unsigned long l) * The result is not defined if no bits are set, so check that @word * is non-zero before calling this. */ -static inline unsigned long __ffs64(u64 word) +static inline unsigned int __ffs64(u64 word) { #if BITS_PER_LONG == 32 if (((u32)word) == 0UL) @@ -252,18 +248,12 @@ static inline unsigned long __ffs64(u64 word) * @word: The word to search * @n: Bit to find */ -static inline unsigned long fns(unsigned long word, unsigned int n) +static inline unsigned int fns(unsigned long word, unsigned int n) { - unsigned int bit; - - while (word) { - bit = __ffs(word); - if (n-- == 0) - return bit; - __clear_bit(bit, &word); - } + while (word && n--) + word &= word - 1; - return BITS_PER_LONG; + return word ? __ffs(word) : BITS_PER_LONG; } /** @@ -272,23 +262,11 @@ static inline unsigned long fns(unsigned long word, unsigned int n) * @addr: the address to start counting from * @value: the value to assign */ -static __always_inline void assign_bit(long nr, volatile unsigned long *addr, - bool value) -{ - if (value) - set_bit(nr, addr); - else - clear_bit(nr, addr); -} +#define assign_bit(nr, addr, value) \ + ((value) ? set_bit((nr), (addr)) : clear_bit((nr), (addr))) -static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, - bool value) -{ - if (value) - __set_bit(nr, addr); - else - __clear_bit(nr, addr); -} +#define __assign_bit(nr, addr, value) \ + ((value) ? __set_bit((nr), (addr)) : __clear_bit((nr), (addr))) /** * __ptr_set_bit - Set bit in a pointer's value diff --git a/include/linux/bits.h b/include/linux/bits.h index 7c0cf5031abe..0eb24d21aac2 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -4,6 +4,7 @@ #include <linux/const.h> #include <vdso/bits.h> +#include <uapi/linux/bits.h> #include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) @@ -30,15 +31,8 @@ #define GENMASK_INPUT_CHECK(h, l) 0 #endif -#define __GENMASK(h, l) \ - (((~UL(0)) - (UL(1) << (l)) + 1) & \ - (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) - -#define __GENMASK_ULL(h, l) \ - (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ - (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 378b2459efe2..e253e7bd0d17 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -20,6 +20,7 @@ struct blk_integrity_iter { unsigned int data_size; unsigned short interval; unsigned char tuple_size; + unsigned char pi_offset; const char *disk_name; }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7a8150a5f051..89ba6b16fe8b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -8,6 +8,7 @@ #include <linux/scatterlist.h> #include <linux/prefetch.h> #include <linux/srcu.h> +#include <linux/rw_hint.h> struct blk_mq_tags; struct blk_flush_queue; @@ -53,8 +54,8 @@ typedef __u32 __bitwise req_flags_t; /* Look at ->special_vec for the actual data payload instead of the bio chain. */ #define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) -/* The per-zone write lock is held for this request */ -#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) +/* The request completion needs to be signaled to zone write pluging. */ +#define RQF_ZONE_WRITE_PLUGGING ((__force req_flags_t)(1 << 20)) /* ->timeout has been called, don't expire again */ #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) #define RQF_RESV ((__force req_flags_t)(1 << 23)) @@ -135,6 +136,7 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif + enum rw_hint write_hint; unsigned short ioprio; enum mq_rq_state state; @@ -682,17 +684,19 @@ enum { #define BLK_MQ_NO_HCTX_IDX (-1U) -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); -#define blk_mq_alloc_disk(set, queuedata) \ +#define blk_mq_alloc_disk(set, lim, queuedata) \ ({ \ static struct lock_class_key __key; \ \ - __blk_mq_alloc_disk(set, queuedata, &__key); \ + __blk_mq_alloc_disk(set, lim, queuedata, &__key); \ }) struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass); -struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); +struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, + struct queue_limits *lim, void *queuedata); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_destroy_queue(struct request_queue *); @@ -1146,85 +1150,4 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, } void blk_dump_rq_flags(struct request *, char *); -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int blk_rq_zone_no(struct request *rq) -{ - return disk_zone_no(rq->q->disk, blk_rq_pos(rq)); -} - -static inline unsigned int blk_rq_zone_is_seq(struct request *rq) -{ - return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq)); -} - -/** - * blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization. - * @rq: Request to examine. - * - * Note: REQ_OP_ZONE_APPEND requests do not require serialization. - */ -static inline bool blk_rq_is_seq_zoned_write(struct request *rq) -{ - return op_needs_zoned_write_locking(req_op(rq)) && - blk_rq_zone_is_seq(rq); -} - -bool blk_req_needs_zone_write_lock(struct request *rq); -bool blk_req_zone_write_trylock(struct request *rq); -void __blk_req_zone_write_lock(struct request *rq); -void __blk_req_zone_write_unlock(struct request *rq); - -static inline void blk_req_zone_write_lock(struct request *rq) -{ - if (blk_req_needs_zone_write_lock(rq)) - __blk_req_zone_write_lock(rq); -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ - if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED) - __blk_req_zone_write_unlock(rq); -} - -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return rq->q->disk->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock); -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - if (!blk_req_needs_zone_write_lock(rq)) - return true; - return !blk_req_zone_is_write_locked(rq); -} -#else /* CONFIG_BLK_DEV_ZONED */ -static inline bool blk_rq_is_seq_zoned_write(struct request *rq) -{ - return false; -} - -static inline bool blk_req_needs_zone_write_lock(struct request *rq) -{ - return false; -} - -static inline void blk_req_zone_write_lock(struct request *rq) -{ -} - -static inline void blk_req_zone_write_unlock(struct request *rq) -{ -} -static inline bool blk_req_zone_is_write_locked(struct request *rq) -{ - return false; -} - -static inline bool blk_req_can_dispatch_to_zone(struct request *rq) -{ - return true; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - #endif /* BLK_MQ_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f288c94374b3..781c4500491b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -10,6 +10,7 @@ #include <linux/bvec.h> #include <linux/device.h> #include <linux/ktime.h> +#include <linux/rw_hint.h> struct bio_set; struct bio; @@ -44,12 +45,17 @@ struct block_device { struct request_queue * bd_queue; struct disk_stats __percpu *bd_stats; unsigned long bd_stamp; - bool bd_read_only; /* read-only policy */ - u8 bd_partno; - bool bd_write_holder; - bool bd_has_submit_bio; + atomic_t __bd_flags; // partition number + flags +#define BD_PARTNO 255 // lower 8 bits; assign-once +#define BD_READ_ONLY (1u<<8) // read-only policy +#define BD_WRITE_HOLDER (1u<<9) +#define BD_HAS_SUBMIT_BIO (1u<<10) +#define BD_RO_WARNED (1u<<11) +#ifdef CONFIG_FAIL_MAKE_REQUEST +#define BD_MAKE_IT_FAIL (1u<<12) +#endif dev_t bd_dev; - struct inode *bd_inode; /* will die */ + struct address_space *bd_mapping; /* page cache */ atomic_t bd_openers; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ @@ -64,10 +70,6 @@ struct block_device { struct mutex bd_fsfreeze_mutex; /* serialize freeze/thaw */ struct partition_meta_info *bd_meta_info; -#ifdef CONFIG_FAIL_MAKE_REQUEST - bool bd_make_it_fail; -#endif - bool bd_ro_warned; int bd_writers; /* * keep this out-of-line as it's both big and not needed in the fast @@ -87,15 +89,9 @@ struct block_device { /* * Block error status values. See block/blk-core:blk_errors for the details. - * Alpha cannot write a byte atomically, so we need to use 32-bit value. */ -#if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__) -typedef u32 __bitwise blk_status_t; -typedef u32 blk_short_t; -#else typedef u8 __bitwise blk_status_t; typedef u16 blk_short_t; -#endif #define BLK_STS_OK 0 #define BLK_STS_NOTSUPP ((__force blk_status_t)1) #define BLK_STS_TIMEOUT ((__force blk_status_t)2) @@ -136,25 +132,13 @@ typedef u16 blk_short_t; #define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) /* - * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone - * related resources are unavailable, but the driver can guarantee the queue - * will be rerun in the future once the resources become available again. - * - * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references - * a zone specific resource and IO to a different zone on the same device could - * still be served. Examples of that are zones that are write-locked, but a read - * to the same zone could be served. - */ -#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14) - -/* * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion * path if the device returns a status indicating that too many zone resources * are currently open. The same command should be successful if resubmitted * after the number of open zones decreases below the device's limits, which is * reported in the request_queue's max_open_zones. */ -#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15) +#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)14) /* * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion @@ -163,20 +147,20 @@ typedef u16 blk_short_t; * after the number of active zones decreases below the device's limits, which * is reported in the request_queue's max_active_zones. */ -#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16) +#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)15) /* * BLK_STS_OFFLINE is returned from the driver when the target device is offline * or is being taken offline. This could help differentiate the case where a * device is intentionally being shut down from a real I/O error. */ -#define BLK_STS_OFFLINE ((__force blk_status_t)17) +#define BLK_STS_OFFLINE ((__force blk_status_t)16) /* * BLK_STS_DURATION_LIMIT is returned from the driver when the target device * aborted the command because it exceeded one of its Command Duration Limits. */ -#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)18) +#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)17) /** * blk_path_error - returns true if error may be path related @@ -206,52 +190,10 @@ static inline bool blk_path_error(blk_status_t error) return true; } -/* - * From most significant bit: - * 1 bit: reserved for other usage, see below - * 12 bits: original size of bio - * 51 bits: issue time of bio - */ -#define BIO_ISSUE_RES_BITS 1 -#define BIO_ISSUE_SIZE_BITS 12 -#define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) -#define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) -#define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) -#define BIO_ISSUE_SIZE_MASK \ - (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) -#define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) - -/* Reserved bit for blk-throtl */ -#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) - struct bio_issue { u64 value; }; -static inline u64 __bio_issue_time(u64 time) -{ - return time & BIO_ISSUE_TIME_MASK; -} - -static inline u64 bio_issue_time(struct bio_issue *issue) -{ - return __bio_issue_time(issue->value); -} - -static inline sector_t bio_issue_size(struct bio_issue *issue) -{ - return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); -} - -static inline void bio_issue_init(struct bio_issue *issue, - sector_t size) -{ - size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; - issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | - (ktime_get_ns() & BIO_ISSUE_TIME_MASK) | - ((u64)size << BIO_ISSUE_SIZE_SHIFT)); -} - typedef __u32 __bitwise blk_opf_t; typedef unsigned int blk_qc_t; @@ -269,12 +211,18 @@ struct bio { */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; + enum rw_hint bi_write_hint; blk_status_t bi_status; atomic_t __bi_remaining; struct bvec_iter bi_iter; - blk_qc_t bi_cookie; + union { + /* for polled bios: */ + blk_qc_t bi_cookie; + /* for plugged zoned writes only: */ + unsigned int __bi_nr_segments; + }; bio_end_io_t *bi_end_io; void *bi_private; #ifdef CONFIG_BLK_CGROUP @@ -344,7 +292,8 @@ enum { BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, - BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */ + BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */ BIO_FLAG_LAST }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99e4f5e72213..aefdda9f4ec7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -24,6 +24,7 @@ #include <linux/sbitmap.h> #include <linux/uuid.h> #include <linux/xarray.h> +#include <linux/file.h> struct module; struct request_queue; @@ -42,7 +43,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern const struct device_type part_type; -extern struct class block_class; +extern const struct class block_class; /* * Maximum number of blkcg policies allowed to be registered concurrently. @@ -108,6 +109,7 @@ struct blk_integrity { const struct blk_integrity_profile *profile; unsigned char flags; unsigned char tuple_size; + unsigned char pi_offset; unsigned char interval_exp; unsigned char tag_size; }; @@ -126,6 +128,8 @@ typedef unsigned int __bitwise blk_mode_t; #define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) /* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */ #define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5)) +/* return partition scanning errors */ +#define BLK_OPEN_STRICT_SCAN ((__force blk_mode_t)(1 << 6)) struct gendisk { /* @@ -175,24 +179,21 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_ZONED /* - * Zoned block device information for request dispatch control. - * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit set) or - * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones - * bits which indicates if a zone is write locked, that is, if a write - * request targeting the zone was dispatched. - * - * Reads of this information must be protected with blk_queue_enter() / - * blk_queue_exit(). Modifying this information is only allowed while - * no requests are being processed. See also blk_mq_freeze_queue() and - * blk_mq_unfreeze_queue(). + * Zoned block device information. Reads of this information must be + * protected with blk_queue_enter() / blk_queue_exit(). Modifying this + * information is only allowed while no requests are being processed. + * See also blk_mq_freeze_queue() and blk_mq_unfreeze_queue(). */ unsigned int nr_zones; - unsigned int max_open_zones; - unsigned int max_active_zones; + unsigned int zone_capacity; unsigned long *conv_zones_bitmap; - unsigned long *seq_zones_wlock; + unsigned int zone_wplugs_hash_bits; + spinlock_t zone_wplugs_lock; + struct mempool_s *zone_wplugs_pool; + struct hlist_head *zone_wplugs_hash; + struct list_head zone_wplugs_err_list; + struct work_struct zone_wplugs_work; + struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ #if IS_ENABLED(CONFIG_CDROM) @@ -211,11 +212,6 @@ struct gendisk { struct blk_independent_access_ranges *ia_ranges; }; -static inline bool disk_live(struct gendisk *disk) -{ - return !inode_unhashed(disk->part0->bd_inode); -} - /** * disk_openers - returns how many openers are there for a disk * @disk: disk to check @@ -231,6 +227,19 @@ static inline unsigned int disk_openers(struct gendisk *disk) return atomic_read(&disk->part0->bd_openers); } +/** + * disk_has_partscan - return %true if partition scanning is enabled on a disk + * @disk: disk to check + * + * Returns %true if partitions scanning is enabled for @disk, or %false if + * partition scanning is disabled either permanently or temporarily. + */ +static inline bool disk_has_partscan(struct gendisk *disk) +{ + return !(disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) && + !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state); +} + /* * The gendisk is refcounted by the part0 block_device, and the bd_device * therein is also used for device model presentation in sysfs. @@ -292,6 +301,7 @@ struct queue_limits { unsigned int io_opt; unsigned int max_discard_sectors; unsigned int max_hw_discard_sectors; + unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; unsigned int max_zone_append_sectors; @@ -307,6 +317,8 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; bool zoned; + unsigned int max_open_zones; + unsigned int max_active_zones; /* * Drivers that set dma_alignment to less than 511 must be prepared to @@ -325,9 +337,8 @@ void disk_set_zoned(struct gendisk *disk); int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, - sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); -int blk_revalidate_disk_zones(struct gendisk *disk, - void (*update_driver_data)(struct gendisk *disk)); + sector_t sectors, sector_t nr_sectors); +int blk_revalidate_disk_zones(struct gendisk *disk); /* * Independent access ranges: struct blk_independent_access_range describes @@ -444,8 +455,6 @@ struct request_queue { atomic_t nr_active_requests_shared_tags; - unsigned int required_elevator_features; - struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; @@ -473,6 +482,7 @@ struct request_queue { struct mutex sysfs_lock; struct mutex sysfs_dir_lock; + struct mutex limits_lock; /* * for reusing dead hctx instance in case of updating @@ -627,37 +637,29 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) -{ - if (!blk_queue_is_zoned(disk->queue)) - return false; - if (!disk->conv_zones_bitmap) - return true; - return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); -} - static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->max_open_zones = max_open_zones; + disk->queue->limits.max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->max_active_zones = max_active_zones; + disk->queue->limits.max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->max_open_zones; + return bdev->bd_disk->queue->limits.max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->max_active_zones; + return bdev->bd_disk->queue->limits.max_active_zones; } +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int bdev_nr_zones(struct block_device *bdev) { @@ -668,10 +670,6 @@ static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; } -static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) -{ - return false; -} static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { return 0; @@ -685,6 +683,10 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; } +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} #endif /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_depth(struct request_queue *q) @@ -716,15 +718,35 @@ void invalidate_disk(struct gendisk *disk); void set_disk_ro(struct gendisk *disk, bool read_only); void disk_uevent(struct gendisk *disk, enum kobject_action action); +static inline u8 bdev_partno(const struct block_device *bdev) +{ + return atomic_read(&bdev->__bd_flags) & BD_PARTNO; +} + +static inline bool bdev_test_flag(const struct block_device *bdev, unsigned flag) +{ + return atomic_read(&bdev->__bd_flags) & flag; +} + +static inline void bdev_set_flag(struct block_device *bdev, unsigned flag) +{ + atomic_or(flag, &bdev->__bd_flags); +} + +static inline void bdev_clear_flag(struct block_device *bdev, unsigned flag) +{ + atomic_andnot(flag, &bdev->__bd_flags); +} + static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0->bd_read_only || + return bdev_test_flag(disk->part0, BD_READ_ONLY) || test_bit(GD_READ_ONLY, &disk->state); } static inline int bdev_read_only(struct block_device *bdev) { - return bdev->bd_read_only || get_disk_ro(bdev->bd_disk); + return bdev_test_flag(bdev, BD_READ_ONLY) || get_disk_ro(bdev->bd_disk); } bool set_capacity_and_notify(struct gendisk *disk, sector_t size); @@ -763,22 +785,26 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); -struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); +struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, + struct lock_class_key *lkclass); /** * blk_alloc_disk - allocate a gendisk structure + * @lim: queue limits to be used for this disk. * @node_id: numa node to allocate on * * Allocate and pre-initialize a gendisk structure for use with BIO based * drivers. * + * Returns an ERR_PTR on error, else the allocated disk. + * * Context: can sleep */ -#define blk_alloc_disk(node_id) \ +#define blk_alloc_disk(lim, node_id) \ ({ \ static struct lock_class_key __key; \ \ - __blk_alloc_disk(node_id, &__key); \ + __blk_alloc_disk(lim, node_id, &__key); \ }) int __register_blkdev(unsigned int major, const char *name, @@ -845,9 +871,11 @@ static inline unsigned int bio_zone_no(struct bio *bio) return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } -static inline unsigned int bio_zone_is_seq(struct bio *bio) +static inline bool bio_straddles_zones(struct bio *bio) { - return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); + return bio_sectors(bio) && + bio_zone_no(bio) != + disk_zone_no(bio->bi_bdev->bd_disk, bio_end_sector(bio) - 1); } /* @@ -861,18 +889,48 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } +/** + * queue_limits_start_update - start an atomic update of queue limits + * @q: queue to update + * + * This functions starts an atomic update of the queue limits. It takes a lock + * to prevent other updates and returns a snapshot of the current limits that + * the caller can modify. The caller must call queue_limits_commit_update() + * to finish the update. + * + * Context: process context. The caller must have frozen the queue or ensured + * that there is outstanding I/O by other means. + */ +static inline struct queue_limits +queue_limits_start_update(struct request_queue *q) + __acquires(q->limits_lock) +{ + mutex_lock(&q->limits_lock); + return q->limits; +} +int queue_limits_commit_update(struct request_queue *q, + struct queue_limits *lim); +int queue_limits_set(struct request_queue *q, struct queue_limits *lim); + +/** + * queue_limits_cancel_update - cancel an atomic update of queue limits + * @q: queue to update + * + * This functions cancels an atomic update of the queue limits started by + * queue_limits_start_update() and should be used when an error occurs after + * starting update. + */ +static inline void queue_limits_cancel_update(struct request_queue *q) +{ + mutex_unlock(&q->limits_lock); +} + /* * Access functions for manipulating queue properties */ -void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); -extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); -extern void blk_queue_max_segments(struct request_queue *, unsigned short); -extern void blk_queue_max_discard_segments(struct request_queue *, - unsigned short); void blk_queue_max_secure_erase_sectors(struct request_queue *q, unsigned int max_sectors); -extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, @@ -889,18 +947,13 @@ void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); -extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, - sector_t offset); +void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, + sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); -extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); -extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); -extern void blk_queue_dma_alignment(struct request_queue *, int); -extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); @@ -909,17 +962,6 @@ disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); void disk_set_independent_access_ranges(struct gendisk *disk, struct blk_independent_access_ranges *iars); -/* - * Elevator features for blk_queue_required_elevator_features: - */ -/* Supports zoned block devices sequential write constraint */ -#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) - -extern void blk_queue_required_elevator_features(struct request_queue *q, - unsigned int features); -extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q, - struct device *dev); - bool __must_check blk_get_queue(struct request_queue *); extern void blk_put_queue(struct request_queue *); @@ -942,6 +984,7 @@ struct blk_plug { /* if ios_left is > 1, we can batch tag/rq allocations */ struct request *cached_rq; + u64 cur_ktime; unsigned short nr_ios; unsigned short rq_count; @@ -972,6 +1015,18 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) __blk_flush_plug(plug, async); } +/* + * tsk == current here + */ +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (plug) + plug->cur_ktime = 0; + current->flags &= ~PF_BLOCK_TS; +} + int blkdev_issue_flush(struct block_device *bdev); long nr_blockdev_pages(void); #else /* CONFIG_BLOCK */ @@ -995,6 +1050,10 @@ static inline void blk_flush_plug(struct blk_plug *plug, bool async) { } +static inline void blk_plug_invalidate_ts(struct task_struct *tsk) +{ +} + static inline int blkdev_issue_flush(struct block_device *bdev) { return 0; @@ -1047,7 +1106,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, static inline bool bdev_is_partition(struct block_device *bdev) { - return bdev->bd_partno; + return bdev_partno(bdev) != 0; } enum blk_default_limits { @@ -1106,12 +1165,29 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q) +static inline unsigned int queue_limits_max_zone_append_sectors(struct queue_limits *l) { + unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); - const struct queue_limits *l = &q->limits; + return min_not_zero(l->max_zone_append_sectors, max_sectors); +} - return min(l->max_zone_append_sectors, l->max_sectors); +static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q) +{ + if (!blk_queue_is_zoned(q)) + return 0; + + return queue_limits_max_zone_append_sectors(&q->limits); +} + +static inline bool queue_emulates_zone_append(struct request_queue *q) +{ + return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors; +} + +static inline bool bdev_emulates_zone_append(struct block_device *bdev) +{ + return queue_emulates_zone_append(bdev_get_queue(bdev)); } static inline unsigned int @@ -1253,18 +1329,6 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec) return disk_zone_no(bdev->bd_disk, sec); } -/* Whether write serialization is required for @op on zoned devices. */ -static inline bool op_needs_zoned_write_locking(enum req_op op) -{ - return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES; -} - -static inline bool bdev_op_is_zoned_write(struct block_device *bdev, - enum req_op op) -{ - return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op); -} - static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -1280,6 +1344,12 @@ static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev, return sector & (bdev_zone_sectors(bdev) - 1); } +static inline sector_t bio_offset_from_zone_start(struct bio *bio) +{ + return bdev_offset_from_zone_start(bio->bi_bdev, + bio->bi_iter.bi_sector); +} + static inline bool bdev_is_zone_start(struct block_device *bdev, sector_t sector) { @@ -1316,11 +1386,6 @@ static inline unsigned int blksize_bits(unsigned int size) return order_base_2(size >> SECTOR_SHIFT) + SECTOR_SHIFT; } -static inline unsigned int block_size(struct block_device *bdev) -{ - return 1 << bdev->bd_inode->i_blkbits; -} - int kblockd_schedule_work(struct work_struct *work); int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); @@ -1426,7 +1491,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) } int bdev_read_only(struct block_device *bdev); -int set_blocksize(struct block_device *bdev, int size); +int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1474,26 +1539,22 @@ extern const struct blk_holder_ops fs_holder_ops; (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) -struct bdev_handle { - struct block_device *bdev; - void *holder; - blk_mode_t mode; -}; - -struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, +struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); -struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, +struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops); int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -void bdev_release(struct bdev_handle *handle); /* just for blk-cgroup, don't use elsewhere */ struct block_device *blkdev_get_no_open(dev_t dev); void blkdev_put_no_open(struct block_device *bdev); struct block_device *I_BDEV(struct inode *inode); +struct block_device *file_bdev(struct file *bdev_file); +bool disk_live(struct gendisk *disk); +unsigned int block_size(struct block_device *bdev); #ifdef CONFIG_BLOCK void invalidate_bdev(struct block_device *bdev); @@ -1533,6 +1594,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev) int bdev_freeze(struct block_device *bdev); int bdev_thaw(struct block_device *bdev); +void bdev_fput(struct file *bdev_file); struct io_comp_batch { struct request *req_list; diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index ca73940e26df..3f4b4ac527ca 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -10,6 +10,7 @@ #ifdef __KERNEL__ #include <linux/kernel.h> #include <linux/types.h> +bool __init cmdline_has_extra_options(void); #else /* !__KERNEL__ */ /* * NOTE: This is only for tools/bootconfig, because tools/bootconfig will @@ -287,7 +288,12 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_exit(void); +void __init _xbc_exit(bool early); + +static inline void xbc_exit(void) +{ + _xbc_exit(false); +} /* XBC embedded bootconfig data in kernel */ #ifdef CONFIG_BOOT_CONFIG_EMBED diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a789266feac3..fb3c3e7181e6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -196,7 +196,8 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && \ - cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) \ + cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS) && sk && \ + sk_fullsock(sk)) \ __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ CGROUP_INET_INGRESS); \ \ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e30100597d0a..5e694a308081 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -37,6 +37,7 @@ struct perf_event; struct bpf_prog; struct bpf_prog_aux; struct bpf_map; +struct bpf_arena; struct sock; struct seq_file; struct btf; @@ -52,6 +53,10 @@ struct module; struct bpf_func_state; struct ftrace_ops; struct cgroup; +struct bpf_token; +struct user_namespace; +struct super_block; +struct inode; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -135,6 +140,9 @@ struct bpf_map_ops { int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); __poll_t (*map_poll)(struct bpf_map *map, struct file *filp, struct poll_table_struct *pts); + unsigned long (*map_get_unmapped_area)(struct file *filep, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); /* Functions called by bpf_local_storage maps */ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap, @@ -176,8 +184,8 @@ struct bpf_map_ops { }; enum { - /* Support at most 10 fields in a BTF type */ - BTF_FIELDS_MAX = 10, + /* Support at most 11 fields in a BTF type */ + BTF_FIELDS_MAX = 11, }; enum btf_field_type { @@ -194,6 +202,7 @@ enum btf_field_type { BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), + BPF_WORKQUEUE = (1 << 10), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -230,6 +239,7 @@ struct btf_record { u32 field_mask; int spin_lock_off; int timer_off; + int wq_off; int refcount_off; struct btf_field fields[]; }; @@ -247,10 +257,7 @@ struct bpf_list_node_kern { } __attribute__((aligned(8))); struct bpf_map { - /* The first two cachelines with read-mostly members of which some - * are also accessed in fast-path (e.g. ops, max_entries). - */ - const struct bpf_map_ops *ops ____cacheline_aligned; + const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; @@ -272,17 +279,14 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - /* The 3rd and 4th cacheline with misc members to avoid false sharing - * particularly with refcounting. - */ - atomic64_t refcnt ____cacheline_aligned; + struct mutex freeze_mutex; + atomic64_t refcnt; atomic64_t usercnt; /* rcu is used before freeing and work is only used during freeing */ union { struct work_struct work; struct rcu_head rcu; }; - struct mutex freeze_mutex; atomic64_t writecnt; /* 'Ownership' of program-containing map is claimed by the first program * that is going to use this map or by the first program which FD is @@ -310,6 +314,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_spin_lock"; case BPF_TIMER: return "bpf_timer"; + case BPF_WORKQUEUE: + return "bpf_wq"; case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; @@ -338,6 +344,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); + case BPF_WORKQUEUE: + return sizeof(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: @@ -365,6 +373,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); + case BPF_WORKQUEUE: + return __alignof__(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: @@ -404,6 +414,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: @@ -523,12 +534,13 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_wq_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, struct bpf_spin_lock *spin_lock); - - +u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); +u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -710,6 +722,7 @@ enum bpf_arg_type { * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_ARENA, ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -881,6 +894,7 @@ enum bpf_reg_type { * an explicit null check is required for this struct. */ PTR_TO_MEM, /* reg points to valid memory region */ + PTR_TO_ARENA, PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ @@ -1112,8 +1126,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i void *func_addr); void *arch_alloc_bpf_trampoline(unsigned int size); void arch_free_bpf_trampoline(void *image, unsigned int size); -void arch_protect_bpf_trampoline(void *image, unsigned int size); -void arch_unprotect_bpf_trampoline(void *image, unsigned int size); +int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size); int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr); @@ -1185,7 +1198,6 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - struct module *mod; }; struct bpf_attach_target_info { @@ -1263,6 +1275,7 @@ int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); @@ -1412,6 +1425,7 @@ struct bpf_jit_poke_descriptor { struct bpf_ctx_arg_aux { u32 offset; enum bpf_reg_type reg_type; + struct btf *btf; u32 btf_id; }; @@ -1451,11 +1465,11 @@ struct bpf_prog_aux { bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool attach_tracing_prog; /* true if tracing another tracing program */ bool func_proto_unreliable; - bool sleepable; bool tail_call_reachable; bool xdp_has_frags; bool exception_cb; bool exception_boundary; + struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1485,6 +1499,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_token *token; struct bpf_prog_offload *offload; struct btf *btf; struct bpf_func_info *func_info; @@ -1535,7 +1550,8 @@ struct bpf_prog { enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ - tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */ + tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ + sleepable:1; /* BPF program is sleepable */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ @@ -1568,12 +1584,26 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if underlying BPF program is sleepable we go through tasks trace + * RCU GP and then "classic" RCU GP + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); @@ -1602,6 +1632,12 @@ struct bpf_tracing_link { struct bpf_prog *tgt_prog; }; +struct bpf_raw_tp_link { + struct bpf_link link; + struct bpf_raw_event_map *btp; + u64 cookie; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1609,6 +1645,31 @@ struct bpf_link_primer { u32 id; }; +struct bpf_mount_opts { + kuid_t uid; + kgid_t gid; + umode_t mode; + + /* BPF token-related delegation options */ + u64 delegate_cmds; + u64 delegate_maps; + u64 delegate_progs; + u64 delegate_attachs; +}; + +struct bpf_token { + struct work_struct work; + atomic64_t refcnt; + struct user_namespace *userns; + u64 allowed_cmds; + u64 allowed_maps; + u64 allowed_progs; + u64 allowed_attachs; +#ifdef CONFIG_SECURITY + void *security; +#endif +}; + struct bpf_struct_ops_value; struct btf_member; @@ -1673,19 +1734,64 @@ struct bpf_struct_ops { void (*unreg)(void *kdata); int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); - const struct btf_type *type; - const struct btf_type *value_type; + void *cfi_stubs; + struct module *owner; const char *name; struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; +}; + +/* Every member of a struct_ops type has an instance even a member is not + * an operator (function pointer). The "info" field will be assigned to + * prog->aux->ctx_arg_info of BPF struct_ops programs to provide the + * argument information required by the verifier to verify the program. + * + * btf_ctx_access() will lookup prog->aux->ctx_arg_info to find the + * corresponding entry for an given argument. + */ +struct bpf_struct_ops_arg_info { + struct bpf_ctx_arg_aux *info; + u32 cnt; +}; + +struct bpf_struct_ops_desc { + struct bpf_struct_ops *st_ops; + + const struct btf_type *type; + const struct btf_type *value_type; u32 type_id; u32 value_id; - void *cfi_stubs; + + /* Collection of argument information for each member */ + struct bpf_struct_ops_arg_info *arg_info; +}; + +enum bpf_struct_ops_state { + BPF_STRUCT_OPS_STATE_INIT, + BPF_STRUCT_OPS_STATE_INUSE, + BPF_STRUCT_OPS_STATE_TOBEFREE, + BPF_STRUCT_OPS_STATE_READY, +}; + +struct bpf_struct_ops_common_value { + refcount_t refcnt; + enum bpf_struct_ops_state state; }; #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +/* This macro helps developer to register a struct_ops type and generate + * type information correctly. Developers should use this macro to register + * a struct_ops type instead of calling __register_bpf_struct_ops() directly. + */ +#define register_bpf_struct_ops(st_ops, type) \ + ({ \ + struct bpf_struct_ops_##type { \ + struct bpf_struct_ops_common_value common; \ + struct type data ____cacheline_aligned_in_smp; \ + }; \ + BTF_TYPE_EMIT(struct bpf_struct_ops_##type); \ + __register_bpf_struct_ops(st_ops); \ + }) #define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) + POISON_POINTER_DELTA)) -const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id); -void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log); bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, @@ -1694,7 +1800,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, struct bpf_tramp_link *link, const struct btf_func_model *model, void *stub_func, - void *image, void *image_end); + void **image, u32 *image_off, + bool allow_alloc); +void bpf_struct_ops_image_free(void *image); static inline bool bpf_try_module_get(const void *data, struct module *owner) { if (owner == BPF_MODULE_OWNER) @@ -1727,15 +1835,13 @@ struct bpf_dummy_ops { int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); #endif +int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc, + struct btf *btf, + struct bpf_verifier_log *log); +void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map); +void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc); #else -static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) -{ - return NULL; -} -static inline void bpf_struct_ops_init(struct btf *btf, - struct bpf_verifier_log *log) -{ -} +#define register_bpf_struct_ops(st_ops, type) ({ (void *)(st_ops); 0; }) static inline bool bpf_try_module_get(const void *data, struct module *owner) { return try_module_get(owner); @@ -1754,6 +1860,13 @@ static inline int bpf_struct_ops_link_create(union bpf_attr *attr) { return -EOPNOTSUPP; } +static inline void bpf_map_struct_ops_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ +} + +static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_ops_desc) +{ +} #endif @@ -2029,14 +2142,14 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_lock(); run_ctx.bpf_cookie = item->bpf_cookie; ret &= run_prog(prog, ctx); item++; - if (!prog->aux->sleepable) + if (!prog->sleepable) rcu_read_unlock(); } bpf_reset_run_ctx(old_run_ctx); @@ -2068,6 +2181,7 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +extern const struct super_operations bpf_super_ops; extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -2106,6 +2220,7 @@ void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); +void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); @@ -2135,6 +2250,8 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, + unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG_KMEM void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); @@ -2144,31 +2261,14 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else -static inline void * -bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, - int node) -{ - return kmalloc_node(size, flags, node); -} - -static inline void * -bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) -{ - return kzalloc(size, flags); -} - -static inline void * -bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags) -{ - return kvcalloc(n, size, flags); -} - -static inline void __percpu * -bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, - gfp_t flags) -{ - return __alloc_percpu_gfp(size, align, flags); -} +#define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ + kmalloc_node(_size, _flags, _node) +#define bpf_map_kzalloc(_map, _size, _flags) \ + kzalloc(_size, _flags) +#define bpf_map_kvcalloc(_map, _n, _size, _flags) \ + kvcalloc(_n, _size, _flags) +#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ + __alloc_percpu_gfp(_size, _align, _flags) #endif static inline int @@ -2202,24 +2302,26 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map) extern int sysctl_unprivileged_bpf_disabled; -static inline bool bpf_allow_ptr_leaks(void) +bool bpf_token_capable(const struct bpf_token *token, int cap); + +static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_allow_uninit_stack(void) +static inline bool bpf_allow_uninit_stack(const struct bpf_token *token) { - return perfmon_capable(); + return bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v1(void) +static inline bool bpf_bypass_spec_v1(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } -static inline bool bpf_bypass_spec_v4(void) +static inline bool bpf_bypass_spec_v4(const struct bpf_token *token) { - return cpu_mitigations_off() || perfmon_capable(); + return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON); } int bpf_map_new_fd(struct bpf_map *map, int flags); @@ -2236,8 +2338,21 @@ int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); +void bpf_token_inc(struct bpf_token *token); +void bpf_token_put(struct bpf_token *token); +int bpf_token_create(union bpf_attr *attr); +struct bpf_token *bpf_token_get_from_fd(u32 ufd); + +bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type); +bool bpf_token_allow_prog_type(const struct bpf_token *token, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type); + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); +struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir, + umode_t mode); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ @@ -2472,11 +2587,14 @@ int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *pr struct btf *btf, const struct btf_type *t); const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, int comp_idx, const char *tag_key); +int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key, int last_id); struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); -const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog); void bpf_task_storage_free(struct task_struct *task); void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); @@ -2595,6 +2713,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags) return -EOPNOTSUPP; } +static inline bool bpf_token_capable(const struct bpf_token *token, int cap) +{ + return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN)); +} + +static inline void bpf_token_inc(struct bpf_token *token) +{ +} + +static inline void bpf_token_put(struct bpf_token *token) +{ +} + +static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline void __dev_flush(void) { } @@ -2718,7 +2854,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log, } static inline const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) +bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { return NULL; } @@ -2869,6 +3005,7 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog); void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); @@ -2967,6 +3104,11 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ diff --git a/include/linux/bpf_crypto.h b/include/linux/bpf_crypto.h new file mode 100644 index 000000000000..a41e71d4e2d9 --- /dev/null +++ b/include/linux/bpf_crypto.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#ifndef _BPF_CRYPTO_H +#define _BPF_CRYPTO_H + +struct bpf_crypto_type { + void *(*alloc_tfm)(const char *algo); + void (*free_tfm)(void *tfm); + int (*has_algo)(const char *algo); + int (*setkey)(void *tfm, const u8 *key, unsigned int keylen); + int (*setauthsize)(void *tfm, unsigned int authsize); + int (*encrypt)(void *tfm, const u8 *src, u8 *dst, unsigned int len, u8 *iv); + int (*decrypt)(void *tfm, const u8 *src, u8 *dst, unsigned int len, u8 *iv); + unsigned int (*ivsize)(void *tfm); + unsigned int (*statesize)(void *tfm); + u32 (*get_flags)(void *tfm); + struct module *owner; + char name[14]; +}; + +int bpf_crypto_register_type(const struct bpf_crypto_type *type); +int bpf_crypto_unregister_type(const struct bpf_crypto_type *type); + +#endif /* _BPF_CRYPTO_H */ diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 173ec7f43ed1..dcddb0aef7d8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -129,10 +129,36 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, bool bpf_ma); -struct bpf_local_storage_data * +void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, + struct bpf_local_storage_map *smap, + struct bpf_local_storage_elem *selem); +/* If cacheit_lockit is false, this lookup function is lockless */ +static inline struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, - bool cacheit_lockit); + bool cacheit_lockit) +{ + struct bpf_local_storage_data *sdata; + struct bpf_local_storage_elem *selem; + + /* Fast path (cache hit) */ + sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], + bpf_rcu_lock_held()); + if (sdata && rcu_access_pointer(sdata->smap) == smap) + return sdata; + + /* Slow path (cache miss) */ + hlist_for_each_entry_rcu(selem, &local_storage->list, snode, + rcu_read_lock_trace_held()) + if (rcu_access_pointer(SDATA(selem)->smap) == smap) + break; + + if (!selem) + return NULL; + if (cacheit_lockit) + __bpf_local_storage_insert_cache(local_storage, smap, selem); + return SDATA(selem); +} void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 94baced5a1ad..9f2a6b83b49e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops) BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index d07d857ca67f..50aa87f8d77f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -421,11 +421,13 @@ struct bpf_verifier_state { struct bpf_active_lock active_lock; bool speculative; bool active_rcu_lock; + u32 active_preempt_lock; /* If this state was ever pointed-to by other state's loop_entry field * this flag would be set to true. Used to avoid freeing such states * while they are still in use. */ bool used_as_loop_entry; + bool in_sleepable; /* first and last insn idx of this verifier state */ u32 first_insn_idx; @@ -449,11 +451,12 @@ struct bpf_verifier_state { u32 jmp_history_cnt; u32 dfs_depth; u32 callback_unroll_depth; + u32 may_goto_depth; }; #define bpf_get_spilled_reg(slot, frame, mask) \ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \ - ((1 << frame->stack[slot].slot_type[0]) & (mask))) \ + ((1 << frame->stack[slot].slot_type[BPF_REG_SIZE - 1]) & (mask))) \ ? &frame->stack[slot].spilled_ptr : NULL) /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ @@ -501,6 +504,13 @@ struct bpf_loop_inline_state { u32 callback_subprogno; /* valid when fit_for_inline is true */ }; +/* pointer and state for maps */ +struct bpf_map_ptr_state { + struct bpf_map *map_ptr; + bool poison; + bool unpriv; +}; + /* Possible states for alu_state member. */ #define BPF_ALU_SANITIZE_SRC (1U << 0) #define BPF_ALU_SANITIZE_DST (1U << 1) @@ -513,7 +523,7 @@ struct bpf_loop_inline_state { struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ - unsigned long map_ptr_state; /* pointer/poison value for maps */ + struct bpf_map_ptr_state map_ptr_state; s32 call_imm; /* saved imm field of call insn */ u32 alu_limit; /* limit for add/sub register with pointer */ struct { @@ -547,6 +557,7 @@ struct bpf_insn_aux_data { u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ + bool needs_zext; /* alu op needs to clear upper bits */ bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */ bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */ @@ -610,6 +621,7 @@ struct bpf_subprog_arg_info { enum bpf_arg_type arg_type; union { u32 mem_size; + u32 btf_id; }; }; @@ -618,6 +630,7 @@ struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ u16 stack_depth; /* max. stack depth used by this function */ + u16 stack_extra; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; @@ -662,6 +675,7 @@ struct bpf_verifier_env { u32 prev_insn_idx; struct bpf_prog *prog; /* eBPF program being verified */ const struct bpf_verifier_ops *ops; + struct module *attach_btf_mod; /* The owner module of prog->aux->attach_btf */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ @@ -917,6 +931,15 @@ static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size) +{ +#ifdef __BIG_ENDIAN + off -= spill_size - fill_size; +#endif + + return !(off % BPF_REG_SIZE); +} + const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 79b2f78eec1a..1af241525a17 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -65,9 +65,9 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size); } -static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len) +static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { - void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN); + void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); @@ -77,6 +77,7 @@ static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len) } return p; } +#define kvmemdup_bpfptr(...) alloc_hooks(kvmemdup_bpfptr_noprof(__VA_ARGS__)) static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) { diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 9e97ced2896d..14fa93268630 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -65,7 +65,8 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size); + struct queue_limits *lim, bsg_job_fn *job_fn, + bsg_timeout_fn *timeout, int dd_job_size); void bsg_remove_queue(struct request_queue *q); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); diff --git a/include/linux/btf.h b/include/linux/btf.h index cf5c6ff48981..f9e56fd12a9f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -137,6 +137,7 @@ struct btf_struct_metas { extern const struct file_operations btf_fops; +const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); @@ -494,8 +495,26 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); } +bool btf_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix); +int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, + u32 arg_no); + struct bpf_verifier_log; +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) +struct bpf_struct_ops; +int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops); +const struct bpf_struct_ops_desc *bpf_struct_ops_find_value(struct btf *btf, u32 value_id); +const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id); +#else +static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf *btf, u32 type_id) +{ + return NULL; +} +#endif + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); @@ -512,10 +531,9 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); -const struct btf_type * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg); +bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg); int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); bool btf_types_are_same(const struct btf *btf1, u32 id1, const struct btf *btf2, u32 id2); @@ -555,12 +573,12 @@ static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf { return NULL; } -static inline const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +static inline bool +btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) { - return NULL; + return false; } static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) { diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index a9cb10b0e2e9..c0e3e1426a82 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -3,11 +3,16 @@ #ifndef _LINUX_BTF_IDS_H #define _LINUX_BTF_IDS_H +#include <linux/types.h> /* for u32 */ + struct btf_id_set { u32 cnt; u32 ids[]; }; +/* This flag implies BTF_SET8 holds kfunc(s) */ +#define BTF_SET8_KFUNCS (1 << 0) + struct btf_id_set8 { u32 cnt; u32 flags; @@ -21,6 +26,7 @@ struct btf_id_set8 { #include <linux/compiler.h> /* for __PASTE */ #include <linux/compiler_attributes.h> /* for __maybe_unused */ +#include <linux/stringify.h> /* * Following macros help to define lists of BTF IDs placed @@ -183,17 +189,18 @@ extern struct btf_id_set name; * .word (1 << 3) | (1 << 1) | (1 << 2) * */ -#define __BTF_SET8_START(name, scope) \ +#define __BTF_SET8_START(name, scope, flags) \ +__BTF_ID_LIST(name, local) \ asm( \ ".pushsection " BTF_IDS_SECTION ",\"a\"; \n" \ "." #scope " __BTF_ID__set8__" #name "; \n" \ "__BTF_ID__set8__" #name ":; \n" \ -".zero 8 \n" \ +".zero 4 \n" \ +".long " __stringify(flags) "\n" \ ".popsection; \n"); #define BTF_SET8_START(name) \ -__BTF_ID_LIST(name, local) \ -__BTF_SET8_START(name, local) +__BTF_SET8_START(name, local, 0) #define BTF_SET8_END(name) \ asm( \ @@ -202,6 +209,12 @@ asm( \ ".popsection; \n"); \ extern struct btf_id_set8 name; +#define BTF_KFUNCS_START(name) \ +__BTF_SET8_START(name, local, BTF_SET8_KFUNCS) + +#define BTF_KFUNCS_END(name) \ +BTF_SET8_END(name) + #else #define BTF_ID_LIST(name) static u32 __maybe_unused name[64]; @@ -216,6 +229,8 @@ extern struct btf_id_set8 name; #define BTF_SET_END(name) #define BTF_SET8_START(name) static struct btf_id_set8 __maybe_unused name = { 0 }; #define BTF_SET8_END(name) +#define BTF_KFUNCS_START(name) static struct btf_id_set8 __maybe_unused name = { .flags = BTF_SET8_KFUNCS }; +#define BTF_KFUNCS_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d78454a4dd1f..e022e40b099e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -303,12 +303,38 @@ static inline void put_bh(struct buffer_head *bh) atomic_dec(&bh->b_count); } +/** + * brelse - Release a buffer. + * @bh: The buffer to release. + * + * Decrement a buffer_head's reference count. If @bh is NULL, this + * function is a no-op. + * + * If all buffers on a folio have zero reference count, are clean + * and unlocked, and if the folio is unlocked and not under writeback + * then try_to_free_buffers() may strip the buffers from the folio in + * preparation for freeing it (sometimes, rarely, buffers are removed + * from a folio but it ends up not being freed, and buffers may later + * be reattached). + * + * Context: Any context. + */ static inline void brelse(struct buffer_head *bh) { if (bh) __brelse(bh); } +/** + * bforget - Discard any dirty data in a buffer. + * @bh: The buffer to forget. + * + * Call this function instead of brelse() if the data written to a buffer + * no longer needs to be written back. It will clear the buffer's dirty + * flag so writeback of this buffer will be skipped. + * + * Context: Any context. + */ static inline void bforget(struct buffer_head *bh) { if (bh) @@ -338,7 +364,7 @@ static inline struct buffer_head *getblk_unmovable(struct block_device *bdev, { gfp_t gfp; - gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp = mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS); gfp |= __GFP_NOFAIL; return bdev_getblk(bdev, block, size, gfp); @@ -349,7 +375,7 @@ static inline struct buffer_head *__getblk(struct block_device *bdev, { gfp_t gfp; - gfp = mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp = mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS); gfp |= __GFP_MOVABLE | __GFP_NOFAIL; return bdev_getblk(bdev, block, size, gfp); @@ -437,17 +463,21 @@ static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[], } /** - * __bread() - reads a specified block and returns the bh - * @bdev: the block_device to read from - * @block: number of block - * @size: size (in bytes) to read + * __bread() - Read a block. + * @bdev: The block device to read from. + * @block: Block number in units of block size. + * @size: The block size of this device in bytes. * - * Reads a specified block, and returns buffer head that contains it. - * The page cache is allocated from movable area so that it can be migrated. - * It returns NULL if the block was unreadable. + * Read a specified block, and return the buffer head that refers + * to it. The memory is allocated from the movable area so that it can + * be migrated. The returned buffer head has its refcount increased. + * The caller should call brelse() when it has finished with the buffer. + * + * Context: May sleep waiting for I/O. + * Return: NULL if the block was unreadable. */ -static inline struct buffer_head * -__bread(struct block_device *bdev, sector_t block, unsigned size) +static inline struct buffer_head *__bread(struct block_device *bdev, + sector_t block, unsigned size) { return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 8a582d242f06..20aa3c2d89f7 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -11,7 +11,7 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, __u32 *size); int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size); -#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE) +#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO) extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX]; void init_vmlinux_build_id(void); #else diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h new file mode 100644 index 000000000000..18e0a2fc3816 --- /dev/null +++ b/include/linux/bus/stm32_firewall_device.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023, STMicroelectronics - All Rights Reserved + */ + +#ifndef STM32_FIREWALL_DEVICE_H +#define STM32_FIREWALL_DEVICE_H + +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/types.h> + +#define STM32_FIREWALL_MAX_EXTRA_ARGS 5 + +/* Opaque reference to stm32_firewall_controller */ +struct stm32_firewall_controller; + +/** + * struct stm32_firewall - Information on a device's firewall. Each device can have more than one + * firewall. + * + * @firewall_ctrl: Pointer referencing a firewall controller of the device. It is + * opaque so a device cannot manipulate the controller's ops or access + * the controller's data + * @extra_args: Extra arguments that are implementation dependent + * @entry: Name of the firewall entry + * @extra_args_size: Number of extra arguments + * @firewall_id: Firewall ID associated the device for this firewall controller + */ +struct stm32_firewall { + struct stm32_firewall_controller *firewall_ctrl; + u32 extra_args[STM32_FIREWALL_MAX_EXTRA_ARGS]; + const char *entry; + size_t extra_args_size; + u32 firewall_id; +}; + +#if IS_ENABLED(CONFIG_STM32_FIREWALL) +/** + * stm32_firewall_get_firewall - Get the firewall(s) associated to given device. + * The firewall controller reference is always the first argument + * of each of the access-controller property entries. + * The firewall ID is always the second argument of each of the + * access-controller property entries. + * If there's no argument linked to the phandle, then the firewall ID + * field is set to U32_MAX, which is an invalid ID. + * + * @np: Device node to parse + * @firewall: Array of firewall references + * @nb_firewall: Number of firewall references to get. Must be at least 1. + * + * Returns 0 on success, -ENODEV if there's no match with a firewall controller or appropriate errno + * code if error occurred. + */ +int stm32_firewall_get_firewall(struct device_node *np, struct stm32_firewall *firewall, + unsigned int nb_firewall); + +/** + * stm32_firewall_grant_access - Request firewall access rights and grant access. + * + * @firewall: Firewall reference containing the ID to check against its firewall + * controller + * + * Returns 0 if access is granted, -EACCES if access is denied, -ENODEV if firewall is null or + * appropriate errno code if error occurred + */ +int stm32_firewall_grant_access(struct stm32_firewall *firewall); + +/** + * stm32_firewall_release_access - Release access granted from a call to + * stm32_firewall_grant_access(). + * + * @firewall: Firewall reference containing the ID to check against its firewall + * controller + */ +void stm32_firewall_release_access(struct stm32_firewall *firewall); + +/** + * stm32_firewall_grant_access_by_id - Request firewall access rights of a given device + * based on a specific firewall ID + * + * Warnings: + * There is no way to ensure that the given ID will correspond to the firewall referenced in the + * device node if the ID did not come from stm32_firewall_get_firewall(). In that case, this + * function must be used with caution. + * This function should be used for subsystem resources that do not have the same firewall ID + * as their parent. + * U32_MAX is an invalid ID. + * + * @firewall: Firewall reference containing the firewall controller + * @subsystem_id: Firewall ID of the subsystem resource + * + * Returns 0 if access is granted, -EACCES if access is denied, -ENODEV if firewall is null or + * appropriate errno code if error occurred + */ +int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id); + +/** + * stm32_firewall_release_access_by_id - Release access granted from a call to + * stm32_firewall_grant_access_by_id(). + * + * Warnings: + * There is no way to ensure that the given ID will correspond to the firewall referenced in the + * device node if the ID did not come from stm32_firewall_get_firewall(). In that case, this + * function must be used with caution. + * This function should be used for subsystem resources that do not have the same firewall ID + * as their parent. + * U32_MAX is an invalid ID. + * + * @firewall: Firewall reference containing the firewall controller + * @subsystem_id: Firewall ID of the subsystem resource + */ +void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id); + +#else /* CONFIG_STM32_FIREWALL */ + +int stm32_firewall_get_firewall(struct device_node *np, struct stm32_firewall *firewall, + unsigned int nb_firewall); +{ + return -ENODEV; +} + +int stm32_firewall_grant_access(struct stm32_firewall *firewall) +{ + return -ENODEV; +} + +void stm32_firewall_release_access(struct stm32_firewall *firewall) +{ +} + +int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +{ + return -ENODEV; +} + +void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +{ +} + +#endif /* CONFIG_STM32_FIREWALL */ +#endif /* STM32_FIREWALL_DEVICE_H */ diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index d504eb4b49ab..2cb15fe4fe12 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -138,4 +138,10 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level) #define use_arch_cache_info() (false) #endif +#ifndef CONFIG_ARCH_HAS_CPU_CACHE_ALIASING +#define cpu_dcache_is_aliasing() false +#else +#include <asm/cachetype.h> +#endif + #endif /* _LINUX_CACHEINFO_H */ diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index cb0d6cd1c12f..60693a145894 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -90,6 +90,14 @@ enum cc_attr { * Examples include TDX Guest. */ CC_ATTR_HOTPLUG_DISABLED, + + /** + * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. + * + * The host kernel is running with the necessary features + * enabled to run SEV-SNP guests. + */ + CC_ATTR_HOST_SEV_SNP, }; #ifdef CONFIG_ARCH_HAS_CC_PLATFORM @@ -107,10 +115,14 @@ enum cc_attr { * * FALSE - Specified Confidential Computing attribute is not active */ bool cc_platform_has(enum cc_attr attr); +void cc_platform_set(enum cc_attr attr); +void cc_platform_clear(enum cc_attr attr); #else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ static inline bool cc_platform_has(enum cc_attr attr) { return false; } +static inline void cc_platform_set(enum cc_attr attr) { } +static inline void cc_platform_clear(enum cc_attr attr) { } #endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h index 6355a36a3f81..b57118aaa679 100644 --- a/include/linux/cdx/cdx_bus.h +++ b/include/linux/cdx/cdx_bus.h @@ -12,6 +12,7 @@ #include <linux/device.h> #include <linux/list.h> #include <linux/mod_devicetable.h> +#include <linux/msi.h> #define MAX_CDX_DEV_RESOURCES 4 #define CDX_CONTROLLER_ID_SHIFT 4 @@ -21,13 +22,25 @@ struct cdx_controller; enum { + CDX_DEV_MSI_CONF, CDX_DEV_BUS_MASTER_CONF, CDX_DEV_RESET_CONF, + CDX_DEV_MSI_ENABLE, +}; + +struct cdx_msi_config { + u64 addr; + u32 data; + u16 msi_index; }; struct cdx_device_config { u8 type; - bool bus_master_enable; + union { + struct cdx_msi_config msi; + bool bus_master_enable; + bool msi_enable; + }; }; typedef int (*cdx_bus_enable_cb)(struct cdx_controller *cdx, u8 bus_num); @@ -87,6 +100,7 @@ struct cdx_ops { * struct cdx_controller: CDX controller object * @dev: Linux device associated with the CDX controller. * @priv: private data + * @msi_domain: MSI domain * @id: Controller ID * @controller_registered: controller registered with bus * @ops: CDX controller ops @@ -94,6 +108,7 @@ struct cdx_ops { struct cdx_controller { struct device *dev; void *priv; + struct irq_domain *msi_domain; u32 id; bool controller_registered; struct cdx_ops *ops; @@ -120,9 +135,13 @@ struct cdx_controller { * @req_id: Requestor ID associated with CDX device * @is_bus: Is this bus device * @enabled: is this bus enabled + * @msi_dev_id: MSI Device ID associated with CDX device + * @num_msi: Number of MSI's supported by the device * @driver_override: driver name to force a match; do not set directly, * because core frees it; use driver_set_override() to * set or clear it. + * @irqchip_lock: lock to synchronize irq/msi configuration + * @msi_write_pending: MSI write pending for this device */ struct cdx_device { struct device dev; @@ -144,7 +163,11 @@ struct cdx_device { u32 req_id; bool is_bus; bool enabled; + u32 msi_dev_id; + u32 num_msi; const char *driver_override; + struct mutex irqchip_lock; + bool msi_write_pending; }; #define to_cdx_device(_dev) \ @@ -237,4 +260,32 @@ int cdx_set_master(struct cdx_device *cdx_dev); */ int cdx_clear_master(struct cdx_device *cdx_dev); +#ifdef CONFIG_GENERIC_MSI_IRQ +/** + * cdx_enable_msi - Enable MSI for the CDX device. + * @cdx_dev: device pointer + * + * Return: 0 for success, -errno on failure + */ +int cdx_enable_msi(struct cdx_device *cdx_dev); + +/** + * cdx_disable_msi - Disable MSI for the CDX device. + * @cdx_dev: device pointer + */ +void cdx_disable_msi(struct cdx_device *cdx_dev); + +#else /* CONFIG_GENERIC_MSI_IRQ */ + +static inline int cdx_enable_msi(struct cdx_device *cdx_dev) +{ + return -ENODEV; +} + +static inline void cdx_disable_msi(struct cdx_device *cdx_dev) +{ +} + +#endif /* CONFIG_GENERIC_MSI_IRQ */ + #endif /* _CDX_BUS_H_ */ diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index 11a92a946016..5f904591fa5f 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -27,17 +27,13 @@ ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ -# define dout(fmt, ...) do { \ - if (0) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ - } while (0) -# define doutc(client, fmt, ...) do { \ - if (0) \ - printk(KERN_DEBUG "[%pU %llu] " fmt, \ - &client->fsid, \ - client->monc.auth->global_id, \ - ##__VA_ARGS__); \ - } while (0) +# define dout(fmt, ...) \ + no_printk(KERN_DEBUG fmt, ##__VA_ARGS__) +# define doutc(client, fmt, ...) \ + no_printk(KERN_DEBUG "[%pU %llu] " fmt, \ + &client->fsid, \ + client->monc.auth->global_id, \ + ##__VA_ARGS__) # endif #else diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 34aaf0e87def..2150ca60394b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -690,7 +690,7 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); void cgroup_rstat_flush(struct cgroup *cgrp); void cgroup_rstat_flush_hold(struct cgroup *cgrp); -void cgroup_rstat_flush_release(void); +void cgroup_rstat_flush_release(struct cgroup *cgrp); /* * Basic resource stats. diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 1293c38ddb7f..4a537260f655 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1084,18 +1084,28 @@ void of_fixed_factor_clk_setup(struct device_node *node); * @hw: handle between common and hardware-specific interfaces * @mult: multiplier * @div: divider + * @acc: fixed accuracy in ppb + * @flags: behavior modifying flags * * Clock with a fixed multiplier and divider. The output frequency is the * parent clock rate divided by div and multiplied by mult. - * Implements .recalc_rate, .set_rate and .round_rate + * Implements .recalc_rate, .set_rate, .round_rate and .recalc_accuracy + * + * Flags: + * * CLK_FIXED_FACTOR_FIXED_ACCURACY - Use the value in @acc instead of the + * parent clk accuracy. */ struct clk_fixed_factor { struct clk_hw hw; unsigned int mult; unsigned int div; + unsigned long acc; + unsigned int flags; }; +#define CLK_FIXED_FACTOR_FIXED_ACCURACY BIT(0) + #define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw) extern const struct clk_ops clk_fixed_factor_ops; @@ -1106,10 +1116,24 @@ void clk_unregister_fixed_factor(struct clk *clk); struct clk_hw *clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *clk_hw_register_fixed_factor_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div, + unsigned long acc); void clk_hw_unregister_fixed_factor(struct clk_hw *hw); struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *devm_clk_hw_register_fixed_factor_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div); +struct clk_hw *devm_clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev, + struct device_node *np, const char *name, const char *fw_name, + unsigned long flags, unsigned int mult, unsigned int div, + unsigned long acc); struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev, const char *name, unsigned int index, unsigned long flags, unsigned int mult, unsigned int div); diff --git a/include/linux/clk.h b/include/linux/clk.h index 06f1b292f8a0..0fa56d672532 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -202,6 +202,18 @@ bool clk_is_match(const struct clk *p, const struct clk *q); int clk_rate_exclusive_get(struct clk *clk); /** + * devm_clk_rate_exclusive_get - devm variant of clk_rate_exclusive_get + * @dev: device the exclusivity is bound to + * @clk: clock source + * + * Calls clk_rate_exclusive_get() on @clk and registers a devm cleanup handler + * on @dev to call clk_rate_exclusive_put(). + * + * Must not be called from within atomic context. + */ +int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk); + +/** * clk_rate_exclusive_put - release exclusivity over the rate control of a * producer * @clk: clock source @@ -274,6 +286,11 @@ static inline int clk_rate_exclusive_get(struct clk *clk) return 0; } +static inline int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk) +{ + return 0; +} + static inline void clk_rate_exclusive_put(struct clk *clk) {} #endif @@ -479,6 +496,22 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks); /** + * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed) + * @dev: device for clock "consumer" + * @clks: pointer to the clk_bulk_data table of consumer + * + * Returns success (0) or negative errno. + * + * This helper function allows drivers to get all clocks of the + * consumer and enables them in one operation with management. + * The clks will automatically be disabled and freed when the device + * is unbound. + */ + +int __must_check devm_clk_bulk_get_all_enable(struct device *dev, + struct clk_bulk_data **clks); + +/** * devm_clk_get - lookup and obtain a managed reference to a clock producer. * @dev: device for clock "consumer" * @id: clock consumer ID @@ -968,6 +1001,12 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev, return 0; } +static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev, + struct clk_bulk_data **clks) +{ + return 0; +} + static inline struct clk *devm_get_clk_from_child(struct device *dev, struct device_node *np, const char *con_id) { diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index cbfcbf186ce3..e656f63efdce 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -13,11 +13,14 @@ /** * struct clk_omap_reg - OMAP register declaration * @offset: offset from the master IP module base address + * @bit: register bit offset * @index: index of the master IP module + * @flags: flags */ struct clk_omap_reg { void __iomem *ptr; u16 offset; + u8 bit; u8 index; u8 flags; }; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 1d42d4b17327..0ad8b550bb4b 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -291,7 +291,19 @@ static inline void timer_probe(void) {} #define TIMER_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn) -extern ulong max_cswd_read_retries; +static inline unsigned int clocksource_get_max_watchdog_retry(void) +{ + /* + * When system is in the boot phase or under heavy workload, there + * can be random big latencies during the clocksource/watchdog + * read, so allow retries to filter the noise latency. As the + * latency's frequency and maximum value goes up with the number of + * CPUs, scale the number of retries with the number of online + * CPUs. + */ + return (ilog2(num_online_cpus()) / 2) + 1; +} + void clocksource_verify_percpu(struct clocksource *cs); #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index 16775d7d8f8d..a4fa3436940c 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -6,6 +6,9 @@ enum clocksource_ids { CSID_GENERIC = 0, CSID_ARM_ARCH_COUNTER, + CSID_X86_TSC_EARLY, + CSID_X86_TSC, + CSID_X86_KVM_CLK, CSID_MAX, }; diff --git a/include/linux/closure.h b/include/linux/closure.h index c554c6a08768..99155df162d0 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -194,6 +194,18 @@ static inline void closure_sync(struct closure *cl) __closure_sync(cl); } +int __closure_sync_timeout(struct closure *cl, unsigned long timeout); + +static inline int closure_sync_timeout(struct closure *cl, unsigned long timeout) +{ +#ifdef CONFIG_DEBUG_CLOSURES + BUG_ON(closure_nr_remaining(cl) != 1 && !cl->closure_get_happened); +#endif + return cl->closure_get_happened + ? __closure_sync_timeout(cl, timeout) + : 0; +} + #ifdef CONFIG_DEBUG_CLOSURES void closure_debug_create(struct closure *cl); diff --git a/include/linux/cma.h b/include/linux/cma.h index 63873b93deaa..9db877506ea8 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -6,12 +6,8 @@ #include <linux/types.h> #include <linux/numa.h> -/* - * There is always at least global CMA area and a few optional - * areas configured in kernel .config. - */ #ifdef CONFIG_CMA_AREAS -#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) +#define MAX_CMA_AREAS CONFIG_CMA_AREAS #endif #define CMA_MAX_NAME 64 diff --git a/include/linux/cmpxchg-emu.h b/include/linux/cmpxchg-emu.h new file mode 100644 index 000000000000..998deec67740 --- /dev/null +++ b/include/linux/cmpxchg-emu.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Emulated 1-byte and 2-byte cmpxchg operations for architectures + * lacking direct support for these sizes. These are implemented in terms + * of 4-byte cmpxchg operations. + * + * Copyright (C) 2024 Paul E. McKenney. + */ + +#ifndef __LINUX_CMPXCHG_EMU_H +#define __LINUX_CMPXCHG_EMU_H + +uintptr_t cmpxchg_emu_u8(volatile u8 *p, uintptr_t old, uintptr_t new); + +#endif /* __LINUX_CMPXCHG_EMU_H */ diff --git a/include/linux/codetag.h b/include/linux/codetag.h new file mode 100644 index 000000000000..c2a579ccd455 --- /dev/null +++ b/include/linux/codetag.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * code tagging framework + */ +#ifndef _LINUX_CODETAG_H +#define _LINUX_CODETAG_H + +#include <linux/types.h> + +struct codetag_iterator; +struct codetag_type; +struct codetag_module; +struct seq_buf; +struct module; + +/* + * An instance of this structure is created in a special ELF section at every + * code location being tagged. At runtime, the special section is treated as + * an array of these. + */ +struct codetag { + unsigned int flags; /* used in later patches */ + unsigned int lineno; + const char *modname; + const char *function; + const char *filename; +} __aligned(8); + +union codetag_ref { + struct codetag *ct; +}; + +struct codetag_type_desc { + const char *section; + size_t tag_size; + void (*module_load)(struct codetag_type *cttype, + struct codetag_module *cmod); + bool (*module_unload)(struct codetag_type *cttype, + struct codetag_module *cmod); +}; + +struct codetag_iterator { + struct codetag_type *cttype; + struct codetag_module *cmod; + unsigned long mod_id; + struct codetag *ct; +}; + +#ifdef MODULE +#define CT_MODULE_NAME KBUILD_MODNAME +#else +#define CT_MODULE_NAME NULL +#endif + +#define CODE_TAG_INIT { \ + .modname = CT_MODULE_NAME, \ + .function = __func__, \ + .filename = __FILE__, \ + .lineno = __LINE__, \ + .flags = 0, \ +} + +void codetag_lock_module_list(struct codetag_type *cttype, bool lock); +bool codetag_trylock_module_list(struct codetag_type *cttype); +struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype); +struct codetag *codetag_next_ct(struct codetag_iterator *iter); + +void codetag_to_text(struct seq_buf *out, struct codetag *ct); + +struct codetag_type * +codetag_register_type(const struct codetag_type_desc *desc); + +#if defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) +void codetag_load_module(struct module *mod); +bool codetag_unload_module(struct module *mod); +#else +static inline void codetag_load_module(struct module *mod) {} +static inline bool codetag_unload_module(struct module *mod) { return true; } +#endif + +#endif /* _LINUX_CODETAG_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ddab1ef22bee..4c1a39dcb624 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -9,7 +9,7 @@ * Clang prior to 17 is being silly and considers many __cleanup() variables * as unused (because they are, their sole purpose is to go out of scope). * - * https://reviews.llvm.org/D152180 + * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61 */ #undef __cleanup #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func))) @@ -114,11 +114,17 @@ #define __diag_str(s) __diag_str1(s) #define __diag(s) _Pragma(__diag_str(clang diagnostic s)) -#if CONFIG_CLANG_VERSION >= 110000 -#define __diag_clang_11(s) __diag(s) -#else -#define __diag_clang_11(s) -#endif +#define __diag_clang_13(s) __diag(s) #define __diag_ignore_all(option, comment) \ - __diag_clang(11, ignore, option) + __diag_clang(13, ignore, option) + +/* + * clang has horrible behavior with "g" or "rm" constraints for asm + * inputs, turning them into something worse than "m". Avoid using + * constraints with multiple possible uses (but "ir" seems to be ok): + * + * https://github.com/llvm/llvm-project/issues/20571 + */ +#define ASM_INPUT_G "ir" +#define ASM_INPUT_RM "r" diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 75bd1692d2e3..aff92b1d284f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -35,7 +35,7 @@ (typeof(ptr)) (__ptr + (off)); \ }) -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE #define __noretpoline __attribute__((__indirect_branch__("keep"))) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index bb1339c7057b..8c252e073bd8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -116,6 +116,14 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __stringify_label(n) #n +#define __annotate_reachable(c) ({ \ + asm volatile(__stringify_label(c) ":\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long " __stringify_label(c) "b - .\n\t" \ + ".popsection\n\t"); \ +}) +#define annotate_reachable() __annotate_reachable(__COUNTER__) + #define __annotate_unreachable(c) ({ \ asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ @@ -128,6 +136,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __annotate_jump_table __section(".rodata..c_jump_table") #else /* !CONFIG_OBJTOOL */ +#define annotate_reachable() #define annotate_unreachable() #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ @@ -209,7 +218,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define ___ADDRESSABLE(sym, __attrs) \ static void * __used __attrs \ - __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)&sym; + __UNIQUE_ID(__PASTE(__addressable_,sym)) = (void *)(uintptr_t)&sym; #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) @@ -231,6 +240,45 @@ static inline void *offset_to_ptr(const int *off) * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + * + * Details: + * - sizeof() return an integer constant expression, and does not evaluate + * the value of its operand; it only examines the type of its operand. + * - The results of comparing two integer constant expressions is also + * an integer constant expression. + * - The first literal "8" isn't important. It could be any literal value. + * - The second literal "8" is to avoid warnings about unaligned pointers; + * this could otherwise just be "1". + * - (long)(x) is used to avoid warnings about 64-bit types on 32-bit + * architectures. + * - The C Standard defines "null pointer constant", "(void *)0", as + * distinct from other void pointers. + * - If (x) is an integer constant expression, then the "* 0l" resolves + * it into an integer constant expression of value 0. Since it is cast to + * "void *", this makes the second operand a null pointer constant. + * - If (x) is not an integer constant expression, then the second operand + * resolves to a void pointer (but not a null pointer constant: the value + * is not an integer constant 0). + * - The conditional operator's third operand, "(int *)8", is an object + * pointer (to type "int"). + * - The behavior (including the return type) of the conditional operator + * ("operand1 ? operand2 : operand3") depends on the kind of expressions + * given for the second and third operands. This is the central mechanism + * of the macro: + * - When one operand is a null pointer constant (i.e. when x is an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns the type of the + * object pointer operand (i.e. "int *"). Here, within the sizeof(), we + * would then get: + * sizeof(*((int *)(...)) == sizeof(int) == 4 + * - When one operand is a void pointer (i.e. when x is not an integer + * constant expression) and the other is an object pointer (i.e. our + * third operand), the conditional operator returns a "void *" type. + * Here, within the sizeof(), we would then get: + * sizeof(*((void *)(...)) == sizeof(void) == 1 + * - The equality comparison to "sizeof(int)" therefore depends on (x): + * sizeof(int) == sizeof(int) (x) was a constant expression + * sizeof(int) != sizeof(void) (x) was not a constant expression */ #define __is_constexpr(x) \ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 28566624f008..32284cd26d52 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -95,11 +95,11 @@ #endif /* - * Optional: only supported since gcc >= 14 + * Optional: only supported since gcc >= 15 * Optional: only supported since clang >= 18 * * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896 - * clang: https://reviews.llvm.org/D148381 + * clang: https://github.com/llvm/llvm-project/pull/76348 */ #if __has_attribute(__counted_by__) # define __counted_by(member) __attribute__((__counted_by__(member))) @@ -334,6 +334,18 @@ #define __section(section) __attribute__((__section__(section))) /* + * Optional: only supported since gcc >= 12 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-uninitialized-variable-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#uninitialized + */ +#if __has_attribute(__uninitialized__) +# define __uninitialized __attribute__((__uninitialized__)) +#else +# define __uninitialized +#endif + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute @@ -350,6 +362,19 @@ #define __used __attribute__((__used__)) /* + * The __used attribute guarantees that the attributed variable will be + * always emitted by a compiler. It doesn't prevent the compiler from + * throwing 'unused' warnings when it can't detect how the variable is + * actually used. It's a compiler implementation details either emit + * the warning in that case or not. + * + * The combination of both 'used' and 'unused' attributes ensures that + * the variable would be emitted, and will not trigger 'unused' warnings. + * The attribute is applicable for functions, static and global variables. + */ +#define __always_used __used __maybe_unused + +/* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warn_005funused_005fresult-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 0caf354cb94b..93600de3800b 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -99,17 +99,17 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute * * When -falign-functions=N is in use, we must avoid the cold attribute as - * contemporary versions of GCC drop the alignment for cold functions. Worse, - * GCC can implicitly mark callees of cold functions as cold themselves, so - * it's not sufficient to add __function_aligned here as that will not ensure - * that callees are correctly aligned. + * GCC drops the alignment for cold functions. Worse, GCC can implicitly mark + * callees of cold functions as cold themselves, so it's not sufficient to add + * __function_aligned here as that will not ensure that callees are correctly + * aligned. * * See: * * https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9 */ -#if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0) +#if defined(CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT) || (CONFIG_FUNCTION_ALIGNMENT == 0) #define __cold __attribute__((__cold__)) #else #define __cold @@ -273,20 +273,56 @@ struct ftrace_likely_data { * disable all instrumentation. See Kconfig.kcsan where this is mandatory. */ # define __no_kcsan __no_sanitize_thread __disable_sanitizer_instrumentation +/* + * Type qualifier to mark variables where all data-racy accesses should be + * ignored by KCSAN. Note, the implementation simply marks these variables as + * volatile, since KCSAN will treat such accesses as "marked". + */ +# define __data_racy volatile # define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused #else # define __no_kcsan +# define __data_racy +#endif + +#ifdef __SANITIZE_MEMORY__ +/* + * Similarly to KASAN and KCSAN, KMSAN loses function attributes of inlined + * functions, therefore disabling KMSAN checks also requires disabling inlining. + * + * __no_sanitize_or_inline effectively prevents KMSAN from reporting errors + * within the function and marks all its outputs as initialized. + */ +# define __no_sanitize_or_inline __no_kmsan_checks notrace __maybe_unused #endif #ifndef __no_sanitize_or_inline #define __no_sanitize_or_inline __always_inline #endif +/* + * Apply __counted_by() when the Endianness matches to increase test coverage. + */ +#ifdef __LITTLE_ENDIAN +#define __counted_by_le(member) __counted_by(member) +#define __counted_by_be(member) +#else +#define __counted_by_le(member) +#define __counted_by_be(member) __counted_by(member) +#endif + +/* Do not trap wrapping arithmetic within an annotated function. */ +#ifdef CONFIG_UBSAN_SIGNED_WRAP +# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) +#else +# define __signed_wrap +#endif + /* Section for code which can't be instrumented at all */ #define __noinstr_section(section) \ noinline notrace __attribute((__section__(section))) \ __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \ - __no_sanitize_memory + __no_sanitize_memory __signed_wrap #define noinstr __noinstr_section(".noinstr.text") @@ -373,6 +409,15 @@ struct ftrace_likely_data { #define asm_goto_output(x...) asm volatile goto(x) #endif +/* + * Clang has trouble with constraints with multiple + * alternative behaviors (mainly "g" and "rm"). + */ +#ifndef ASM_INPUT_G + #define ASM_INPUT_G "g" + #define ASM_INPUT_RM "rm" +#endif + #ifdef CONFIG_CC_HAS_ASM_INLINE #define asm_inline asm __inline #else diff --git a/include/linux/console.h b/include/linux/console.h index 779d388af8a0..31a8f5b85f5d 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -18,6 +18,7 @@ #include <linux/bits.h> #include <linux/rculist.h> #include <linux/types.h> +#include <linux/vesa.h> struct vc_data; struct console_font_op; @@ -36,63 +37,91 @@ enum vc_intensity; /** * struct consw - callbacks for consoles * + * @owner: the module to get references of when this console is used + * @con_startup: set up the console and return its name (like VGA, EGA, ...) + * @con_init: initialize the console on @vc. @init is true for the very first + * call on this @vc. + * @con_deinit: deinitialize the console from @vc. + * @con_clear: erase @count characters at [@x, @y] on @vc. @count >= 1. + * @con_putc: emit one character with attributes @ca to [@x, @y] on @vc. + * (optional -- @con_putcs would be called instead) + * @con_putcs: emit @count characters with attributes @s to [@x, @y] on @vc. + * @con_cursor: enable/disable cursor depending on @enable * @con_scroll: move lines from @top to @bottom in direction @dir by @lines. * Return true if no generic handling should be done. * Invoked by csi_M and printing to the console. - * @con_set_palette: sets the palette of the console to @table (optional) + * @con_switch: notifier about the console switch; it is supposed to return + * true if a redraw is needed. + * @con_blank: blank/unblank the console. The target mode is passed in @blank. + * @mode_switch is set if changing from/to text/graphics. The hook + * is supposed to return true if a redraw is needed. + * @con_font_set: set console @vc font to @font with height @vpitch. @flags can + * be %KD_FONT_FLAG_DONT_RECALC. (optional) + * @con_font_get: fetch the current font on @vc of height @vpitch into @font. + * (optional) + * @con_font_default: set default font on @vc. @name can be %NULL or font name + * to search for. @font can be filled back. (optional) + * @con_resize: resize the @vc console to @width x @height. @from_user is true + * when this change comes from the user space. + * @con_set_palette: sets the palette of the console @vc to @table (optional) * @con_scrolldelta: the contents of the console should be scrolled by @lines. * Invoked by user. (optional) + * @con_set_origin: set origin (see &vc_data::vc_origin) of the @vc. If not + * provided or returns false, the origin is set to + * @vc->vc_screenbuf. (optional) + * @con_save_screen: save screen content into @vc->vc_screenbuf. Called e.g. + * upon entering graphics. (optional) + * @con_build_attr: build attributes based on @color, @intensity and other + * parameters. The result is used for both normal and erase + * characters. (optional) + * @con_invert_region: invert a region of length @count on @vc starting at @p. + * (optional) + * @con_debug_enter: prepare the console for the debugger. This includes, but + * is not limited to, unblanking the console, loading an + * appropriate palette, and allowing debugger generated output. + * (optional) + * @con_debug_leave: restore the console to its pre-debug state as closely as + * possible. (optional) */ struct consw { struct module *owner; const char *(*con_startup)(void); - void (*con_init)(struct vc_data *vc, int init); + void (*con_init)(struct vc_data *vc, bool init); void (*con_deinit)(struct vc_data *vc); - void (*con_clear)(struct vc_data *vc, int sy, int sx, int height, - int width); - void (*con_putc)(struct vc_data *vc, int c, int ypos, int xpos); - void (*con_putcs)(struct vc_data *vc, const unsigned short *s, - int count, int ypos, int xpos); - void (*con_cursor)(struct vc_data *vc, int mode); + void (*con_clear)(struct vc_data *vc, unsigned int y, + unsigned int x, unsigned int count); + void (*con_putc)(struct vc_data *vc, u16 ca, unsigned int y, + unsigned int x); + void (*con_putcs)(struct vc_data *vc, const u16 *s, + unsigned int count, unsigned int ypos, + unsigned int xpos); + void (*con_cursor)(struct vc_data *vc, bool enable); bool (*con_scroll)(struct vc_data *vc, unsigned int top, unsigned int bottom, enum con_scroll dir, unsigned int lines); - int (*con_switch)(struct vc_data *vc); - int (*con_blank)(struct vc_data *vc, int blank, int mode_switch); - int (*con_font_set)(struct vc_data *vc, struct console_font *font, - unsigned int vpitch, unsigned int flags); + bool (*con_switch)(struct vc_data *vc); + bool (*con_blank)(struct vc_data *vc, enum vesa_blank_mode blank, + bool mode_switch); + int (*con_font_set)(struct vc_data *vc, + const struct console_font *font, + unsigned int vpitch, unsigned int flags); int (*con_font_get)(struct vc_data *vc, struct console_font *font, unsigned int vpitch); int (*con_font_default)(struct vc_data *vc, - struct console_font *font, char *name); + struct console_font *font, const char *name); int (*con_resize)(struct vc_data *vc, unsigned int width, - unsigned int height, unsigned int user); + unsigned int height, bool from_user); void (*con_set_palette)(struct vc_data *vc, const unsigned char *table); void (*con_scrolldelta)(struct vc_data *vc, int lines); - int (*con_set_origin)(struct vc_data *vc); + bool (*con_set_origin)(struct vc_data *vc); void (*con_save_screen)(struct vc_data *vc); u8 (*con_build_attr)(struct vc_data *vc, u8 color, enum vc_intensity intensity, bool blink, bool underline, bool reverse, bool italic); void (*con_invert_region)(struct vc_data *vc, u16 *p, int count); - u16 *(*con_screen_pos)(const struct vc_data *vc, int offset); - unsigned long (*con_getxy)(struct vc_data *vc, unsigned long position, - int *px, int *py); - /* - * Flush the video console driver's scrollback buffer - */ - void (*con_flush_scrollback)(struct vc_data *vc); - /* - * Prepare the console for the debugger. This includes, but is not - * limited to, unblanking the console, loading an appropriate - * palette, and allowing debugger generated output. - */ - int (*con_debug_enter)(struct vc_data *vc); - /* - * Restore the console to its pre-debug state as closely as possible. - */ - int (*con_debug_leave)(struct vc_data *vc); + void (*con_debug_enter)(struct vc_data *vc); + void (*con_debug_leave)(struct vc_data *vc); }; extern const struct consw *conswitchp; @@ -112,32 +141,21 @@ int con_is_bound(const struct consw *csw); int do_unregister_con_driver(const struct consw *csw); int do_take_over_console(const struct consw *sw, int first, int last, int deflt); void give_up_console(const struct consw *sw); -#ifdef CONFIG_HW_CONSOLE -int con_debug_enter(struct vc_data *vc); -int con_debug_leave(void); +#ifdef CONFIG_VT +void con_debug_enter(struct vc_data *vc); +void con_debug_leave(void); #else -static inline int con_debug_enter(struct vc_data *vc) -{ - return 0; -} -static inline int con_debug_leave(void) -{ - return 0; -} +static inline void con_debug_enter(struct vc_data *vc) { } +static inline void con_debug_leave(void) { } #endif -/* cursor */ -#define CM_DRAW (1) -#define CM_ERASE (2) -#define CM_MOVE (3) - /* * The interface for a console, or any other device that wants to capture * console messages (printer driver?) */ /** - * cons_flags - General console flags + * enum cons_flags - General console flags * @CON_PRINTBUFFER: Used by newly registered consoles to avoid duplicate * output of messages that were already shown by boot * consoles or read by userspace via syslog() syscall. @@ -218,7 +236,7 @@ struct nbcon_state { static_assert(sizeof(struct nbcon_state) <= sizeof(int)); /** - * nbcon_prio - console owner priority for nbcon consoles + * enum nbcon_prio - console owner priority for nbcon consoles * @NBCON_PRIO_NONE: Unused * @NBCON_PRIO_NORMAL: Normal (non-emergency) usage * @NBCON_PRIO_EMERGENCY: Emergency output (WARN/OOPS...) @@ -450,7 +468,7 @@ static inline bool console_is_registered(const struct console *con) * for_each_console() - Iterator over registered consoles * @con: struct console pointer used as loop cursor * - * The console list and the console->flags are immutable while iterating. + * The console list and the &console.flags are immutable while iterating. * * Requires console_list_lock to be held. */ @@ -520,12 +538,6 @@ void vcs_remove_sysfs(int index); */ extern atomic_t ignore_console_lock_warning; -/* VESA Blanking Levels */ -#define VESA_NO_BLANKING 0 -#define VESA_VSYNC_SUSPEND 1 -#define VESA_HSYNC_SUSPEND 2 -#define VESA_POWERDOWN 3 - extern void console_init(void); /* For deferred console takeover */ diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 539f1cd45309..20f564e98552 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -151,7 +151,6 @@ struct vc_data { DECLARE_BITMAP(vc_tab_stop, VC_TABSTOPS_COUNT); /* Tab stops. 256 columns. */ unsigned char vc_palette[16*3]; /* Colour palette for VGA+ */ unsigned short * vc_translate; - unsigned int vc_resize_user; /* resize request from user */ unsigned int vc_bell_pitch; /* Console bell pitch */ unsigned int vc_bell_duration; /* Console bell duration */ unsigned short vc_cur_blink_ms; /* Cursor blink duration */ diff --git a/include/linux/coredump.h b/include/linux/coredump.h index d3eba4360150..0904ba010341 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -30,6 +30,8 @@ struct coredump_params { struct core_vma_metadata *vma_meta; }; +extern unsigned int core_file_note_size_limit; + /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. diff --git a/include/linux/coresight.h b/include/linux/coresight.h index a4cb7dd6ca23..f09ace92176e 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -12,6 +12,7 @@ #include <linux/io.h> #include <linux/perf_event.h> #include <linux/sched.h> +#include <linux/platform_device.h> /* Peripheral id registers (0xFD0-0xFEC) */ #define CORESIGHT_PERIPHIDR4 0xfd0 @@ -35,7 +36,7 @@ #define CORESIGHT_UNLOCK 0xc5acce55 -extern struct bus_type coresight_bustype; +extern const struct bus_type coresight_bustype; enum coresight_dev_type { CORESIGHT_DEV_TYPE_SINK, @@ -226,13 +227,26 @@ struct coresight_sysfs_link { * by @coresight_ops. * @access: Device i/o access abstraction for this device. * @dev: The device entity associated to this component. - * @refcnt: keep track of what is in use. + * @mode: This tracer's mode, i.e sysFS, Perf or disabled. This is + * actually an 'enum cs_mode', but is stored in an atomic type. + * This is always accessed through local_read() and local_set(), + * but wherever it's done from within the Coresight device's lock, + * a non-atomic read would also work. This is the main point of + * synchronisation between code happening inside the sysfs mode's + * coresight_mutex and outside when running in Perf mode. A compare + * and exchange swap is done to atomically claim one mode or the + * other. + * @refcnt: keep track of what is in use. Only access this outside of the + * device's spinlock when the coresight_mutex held and mode == + * CS_MODE_SYSFS. Otherwise it must be accessed from inside the + * spinlock. * @orphan: true if the component has connections that haven't been linked. - * @enable: 'true' if component is currently part of an active path. - * @activated: 'true' only if a _sink_ has been activated. A sink can be - * activated but not yet enabled. Enabling for a _sink_ - * happens when a source has been selected and a path is enabled - * from source to that sink. + * @sysfs_sink_activated: 'true' when a sink has been selected for use via sysfs + * by writing a 1 to the 'enable_sink' file. A sink can be + * activated but not yet enabled. Enabling for a _sink_ happens + * when a source has been selected and a path is enabled from + * source to that sink. A sink can also become enabled but not + * activated if it's used via Perf. * @ea: Device attribute for sink representation under PMU directory. * @def_sink: cached reference to default sink found for this device. * @nr_links: number of sysfs links created to other components from this @@ -250,11 +264,11 @@ struct coresight_device { const struct coresight_ops *ops; struct csdev_access access; struct device dev; - atomic_t refcnt; + local_t mode; + int refcnt; bool orphan; - bool enable; /* true only if configured as part of a path */ /* sink specific fields */ - bool activated; /* true only if a sink is part of a path */ + bool sysfs_sink_activated; struct dev_ext_attribute *ea; struct coresight_device *def_sink; /* sysfs links between components */ @@ -378,8 +392,6 @@ struct coresight_ops { const struct coresight_ops_helper *helper_ops; }; -#if IS_ENABLED(CONFIG_CORESIGHT) - static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, u32 offset) { @@ -569,11 +581,43 @@ static inline bool coresight_is_percpu_sink(struct coresight_device *csdev) (csdev->subtype.sink_subtype == CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM); } +/* + * Atomically try to take the device and set a new mode. Returns true on + * success, false if the device is already taken by someone else. + */ +static inline bool coresight_take_mode(struct coresight_device *csdev, + enum cs_mode new_mode) +{ + return local_cmpxchg(&csdev->mode, CS_MODE_DISABLED, new_mode) == + CS_MODE_DISABLED; +} + +static inline enum cs_mode coresight_get_mode(struct coresight_device *csdev) +{ + return local_read(&csdev->mode); +} + +static inline void coresight_set_mode(struct coresight_device *csdev, + enum cs_mode new_mode) +{ + enum cs_mode current_mode = coresight_get_mode(csdev); + + /* + * Changing to a new mode must be done from an already disabled state + * unless it's synchronized with coresight_take_mode(). Otherwise the + * device is already in use and signifies a locking issue. + */ + WARN(new_mode != CS_MODE_DISABLED && current_mode != CS_MODE_DISABLED && + current_mode != new_mode, "Device already in use\n"); + + local_set(&csdev->mode, new_mode); +} + extern struct coresight_device * coresight_register(struct coresight_desc *desc); extern void coresight_unregister(struct coresight_device *csdev); -extern int coresight_enable(struct coresight_device *csdev); -extern void coresight_disable(struct coresight_device *csdev); +extern int coresight_enable_sysfs(struct coresight_device *csdev); +extern void coresight_disable_sysfs(struct coresight_device *csdev); extern int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value); @@ -598,83 +642,6 @@ void coresight_relaxed_write64(struct coresight_device *csdev, u64 val, u32 offset); void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset); -#else -static inline struct coresight_device * -coresight_register(struct coresight_desc *desc) { return NULL; } -static inline void coresight_unregister(struct coresight_device *csdev) {} -static inline int -coresight_enable(struct coresight_device *csdev) { return -ENOSYS; } -static inline void coresight_disable(struct coresight_device *csdev) {} - -static inline int coresight_timeout(struct csdev_access *csa, u32 offset, - int position, int value) -{ - return 1; -} - -static inline int coresight_claim_device_unlocked(struct coresight_device *csdev) -{ - return -EINVAL; -} - -static inline int coresight_claim_device(struct coresight_device *csdev) -{ - return -EINVAL; -} - -static inline void coresight_disclaim_device(struct coresight_device *csdev) {} -static inline void coresight_disclaim_device_unlocked(struct coresight_device *csdev) {} - -static inline bool coresight_loses_context_with_cpu(struct device *dev) -{ - return false; -} - -static inline u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline u32 coresight_read32(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset) -{ -} - -static inline void coresight_relaxed_write32(struct coresight_device *csdev, - u32 val, u32 offset) -{ -} - -static inline u64 coresight_relaxed_read64(struct coresight_device *csdev, - u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline u64 coresight_read64(struct coresight_device *csdev, u32 offset) -{ - WARN_ON_ONCE(1); - return 0; -} - -static inline void coresight_relaxed_write64(struct coresight_device *csdev, - u64 val, u32 offset) -{ -} - -static inline void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset) -{ -} - -#endif /* IS_ENABLED(CONFIG_CORESIGHT) */ - extern int coresight_get_cpu(struct device *dev); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); @@ -692,4 +659,9 @@ coresight_find_output_type(struct coresight_platform_data *pdata, enum coresight_dev_type type, union coresight_dev_subtype subtype); +int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, + struct platform_driver *pdev_drv); + +void coresight_remove_driver(struct amba_driver *amba_drv, + struct platform_driver *pdev_drv); #endif /* _LINUX_COREISGHT_H */ diff --git a/include/linux/counter.h b/include/linux/counter.h index 702e9108bbb4..426b7d58a438 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -6,14 +6,15 @@ #ifndef _COUNTER_H_ #define _COUNTER_H_ +#include <linux/array_size.h> #include <linux/cdev.h> #include <linux/device.h> -#include <linux/kernel.h> #include <linux/kfifo.h> #include <linux/mutex.h> #include <linux/spinlock_types.h> #include <linux/types.h> #include <linux/wait.h> + #include <uapi/linux/counter.h> struct counter_device; @@ -359,7 +360,6 @@ struct counter_ops { * @num_counts: number of Counts specified in @counts * @ext: optional array of Counter device extensions * @num_ext: number of Counter device extensions specified in @ext - * @priv: optional private data supplied by driver * @dev: internal device structure * @chrdev: internal character device structure * @events_list: list of current watching Counter events @@ -602,6 +602,9 @@ struct counter_array { #define COUNTER_COMP_FLOOR(_read, _write) \ COUNTER_COMP_COUNT_U64("floor", _read, _write) +#define COUNTER_COMP_FREQUENCY(_read) \ + COUNTER_COMP_SIGNAL_U64("frequency", _read, NULL) + #define COUNTER_COMP_POLARITY(_read, _write, _available) \ { \ .type = COUNTER_COMP_SIGNAL_POLARITY, \ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index dcb89c987164..861c3bfc5f17 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -75,6 +75,8 @@ extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, @@ -112,7 +114,7 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); -void bringup_nonboot_cpus(unsigned int setup_max_cpus); +void bringup_nonboot_cpus(unsigned int max_cpus); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -128,7 +130,7 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} #endif /* CONFIG_SMP */ -extern struct bus_type cpu_subsys; +extern const struct bus_type cpu_subsys; extern int lockdep_is_cpus_held(void); @@ -196,6 +198,8 @@ void arch_cpu_idle(void); void arch_cpu_idle_prepare(void); void arch_cpu_idle_enter(void); void arch_cpu_idle_exit(void); +void arch_tick_broadcast_enter(void); +void arch_tick_broadcast_exit(void); void __noreturn arch_cpu_idle_dead(void); #ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT @@ -217,7 +221,18 @@ void cpuhp_report_idle_dead(void); static inline void cpuhp_report_idle_dead(void) { } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_CPU_MITIGATIONS extern bool cpu_mitigations_off(void); extern bool cpu_mitigations_auto_nosmt(void); +#else +static inline bool cpu_mitigations_off(void) +{ + return true; +} +static inline bool cpu_mitigations_auto_nosmt(void) +{ + return false; +} +#endif #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index afda5f24d3dd..20f7e98ee8af 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -241,6 +241,12 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy); void cpufreq_enable_fast_switch(struct cpufreq_policy *policy); void cpufreq_disable_fast_switch(struct cpufreq_policy *policy); bool has_target_index(void); + +DECLARE_PER_CPU(unsigned long, cpufreq_pressure); +static inline unsigned long cpufreq_get_pressure(int cpu) +{ + return READ_ONCE(per_cpu(cpufreq_pressure, cpu)); +} #else static inline unsigned int cpufreq_get(unsigned int cpu) { @@ -263,6 +269,11 @@ static inline bool cpufreq_supports_freq_invariance(void) return false; } static inline void disable_cpufreq(void) { } +static inline void cpufreq_update_limits(unsigned int cpu) { } +static inline unsigned long cpufreq_get_pressure(int cpu) +{ + return 0; +} #endif #ifdef CONFIG_CPU_FREQ_STAT @@ -568,9 +579,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, /* * The polling frequency depends on the capability of the processor. Default - * polling frequency is 1000 times the transition latency of the processor. The - * ondemand governor will work on any processor with transition latency <= 10ms, - * using appropriate sampling rate. + * polling frequency is 1000 times the transition latency of the processor. */ #define LATENCY_MULTIPLIER (1000) @@ -694,26 +703,6 @@ struct cpufreq_frequency_table { * order */ }; -#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) -int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table **table); -#else -static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table - **table) -{ - return -EINVAL; -} - -static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, - struct cpufreq_frequency_table - **table) -{ -} -#endif - /* * cpufreq_for_each_entry - iterate over a cpufreq_frequency_table * @pos: the cpufreq_frequency_table * to use as a loop cursor. @@ -1021,6 +1010,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +{ + unsigned int freq; + + if (idx < 0) + return false; + + freq = policy->freq_table[idx].frequency; + + return freq == clamp_val(freq, policy->min, policy->max); +} + static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -1054,7 +1055,8 @@ retry: return 0; } - if (idx < 0 && efficiencies) { + /* Limit frequency index to honor policy->min/max */ + if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { efficiencies = false; goto retry; } @@ -1149,8 +1151,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ if (ret < 0) continue; - if (pargs->np == args.np && pargs->args_count == args.args_count && - !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) + if (of_phandle_args_equal(pargs, &args)) cpumask_set_cpu(cpu, cpumask); of_node_put(args.np); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 172d0a743e5d..7a5785f405b6 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -146,6 +146,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, + CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, @@ -184,6 +185,7 @@ enum cpuhp_state { CPUHP_AP_ARM64_ISNDEP_STARTING, CPUHP_AP_SMPCFD_DYING, CPUHP_AP_HRTIMERS_DYING, + CPUHP_AP_TICK_DYING, CPUHP_AP_X86_TBOOT_DYING, CPUHP_AP_ARM_CACHE_B15_RAC_DYING, CPUHP_AP_ONLINE, @@ -231,6 +233,7 @@ enum cpuhp_state { CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, CPUHP_AP_PERF_CSKY_ONLINE, + CPUHP_AP_TMIGR_ONLINE, CPUHP_AP_WATCHDOG_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RANDOM_ONLINE, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cfb545841a2c..23686bed441d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -7,6 +7,7 @@ * set of CPUs in a system, one bit position per CPU number. In general, * only nr_cpu_ids (<= NR_CPUS) bits are valid. */ +#include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/threads.h> #include <linux/bitmap.h> @@ -187,6 +188,23 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask } /** + * cpumask_first_and_and - return the first cpu from *srcp1 & *srcp2 & *srcp3 + * @srcp1: the first input + * @srcp2: the second input + * @srcp3: the third input + * + * Return: >= nr_cpu_ids if no cpus set in all. + */ +static inline +unsigned int cpumask_first_and_and(const struct cpumask *srcp1, + const struct cpumask *srcp2, + const struct cpumask *srcp3) +{ + return find_first_and_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), + cpumask_bits(srcp3), small_cpumask_bits); +} + +/** * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer * @@ -368,6 +386,16 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) /** + * for_each_cpu_from - iterate over CPUs present in @mask, from @cpu to the end of @mask. + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_from(cpu, mask) \ + for_each_set_bit_from(cpu, cpumask_bits(mask), small_cpumask_bits) + +/** * cpumask_any_but - return a "random" in a cpumask, but not this one. * @mask: the cpumask to search * @cpu: the cpu to ignore. @@ -388,6 +416,29 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** + * cpumask_any_and_but - pick a "random" cpu from *mask1 & *mask2, but not this one. + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * @cpu: the cpu to ignore + * + * Returns >= nr_cpu_ids if no cpus set. + */ +static inline +unsigned int cpumask_any_and_but(const struct cpumask *mask1, + const struct cpumask *mask2, + unsigned int cpu) +{ + unsigned int i; + + cpumask_check(cpu); + i = cpumask_first_and(mask1, mask2); + if (i != cpu) + return i; + + return cpumask_next_and(cpu, mask1, mask2); +} + +/** * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer * @cpu: the Nth cpu to find, starting from 0 @@ -493,6 +544,22 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) } /** + * cpumask_assign_cpu - assign a cpu in a cpumask + * @cpu: cpu number (< nr_cpu_ids) + * @dstp: the cpumask pointer + * @bool: the value to assign + */ +static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) +{ + assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); +} + +static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) +{ + __assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); +} + +/** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer @@ -720,6 +787,19 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, } /** + * cpumask_weight_andnot - Count of bits in (*srcp1 & ~*srcp2) + * @srcp1: the cpumask to count bits (< nr_cpu_ids) in. + * @srcp2: the cpumask to count bits (< nr_cpu_ids) in. + * + * Return: count of bits set in both *srcp1 and *srcp2 + */ +static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); +} + +/** * cpumask_shift_right - *dstp = *srcp >> n * @dstp: the cpumask result * @srcp: the input to shift @@ -839,7 +919,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) */ static inline unsigned int cpumask_size(void) { - return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long); + return bitmap_size(large_cpumask_bits); } /* @@ -977,6 +1057,8 @@ static inline bool cpumask_available(cpumask_var_t mask) } #endif /* CONFIG_CPUMASK_OFFSTACK */ +DEFINE_FREE(free_cpumask_var, struct cpumask *, if (_T) free_cpumask_var(_T)); + /* It's common to want to use cpu_all_mask in struct member initializers, * so it has to refer to an address rather than a pointer. */ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); @@ -1001,11 +1083,6 @@ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); -static inline void reset_cpu_possible_mask(void) -{ - bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS); -} - static inline void set_cpu_possible(unsigned int cpu, bool possible) { diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 875d12598bd2..de4cf0ee96f7 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -70,7 +70,6 @@ extern int cpuset_init(void); extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); -extern void cpuset_wait_for_hotplug(void); extern void inc_dl_tasks_cs(struct task_struct *task); extern void dec_dl_tasks_cs(struct task_struct *task); extern void cpuset_lock(void); @@ -121,11 +120,6 @@ static inline int cpuset_do_page_mem_spread(void) return task_spread_page(current); } -static inline int cpuset_do_slab_mem_spread(void) -{ - return task_spread_slab(current); -} - extern bool current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); @@ -190,8 +184,6 @@ static inline void cpuset_update_active_cpus(void) partition_sched_domains(1, NULL, NULL); } -static inline void cpuset_wait_for_hotplug(void) { } - static inline void inc_dl_tasks_cs(struct task_struct *task) { } static inline void dec_dl_tasks_cs(struct task_struct *task) { } static inline void cpuset_lock(void) { } @@ -264,11 +256,6 @@ static inline int cpuset_do_page_mem_spread(void) return 0; } -static inline int cpuset_do_slab_mem_spread(void) -{ - return 0; -} - static inline bool current_cpuset_is_being_rebound(void) { return false; diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 9eaeaafe0cad..44305336314e 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -5,129 +5,57 @@ #include <linux/linkage.h> #include <linux/elfcore.h> #include <linux/elf.h> -#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION -#include <asm/crash_core.h> -#endif -/* Location of a reserved region to hold the crash kernel. - */ -extern struct resource crashk_res; -extern struct resource crashk_low_res; +struct kimage; -#define CRASH_CORE_NOTE_NAME "CORE" -#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) -#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) +struct crash_mem { + unsigned int max_nr_ranges; + unsigned int nr_ranges; + struct range ranges[] __counted_by(max_nr_ranges); +}; + +#ifdef CONFIG_CRASH_DUMP + +int crash_shrink_memory(unsigned long new_size); +ssize_t crash_get_memory_size(void); +#ifndef arch_kexec_protect_crashkres /* - * The per-cpu notes area is a list of notes terminated by a "NULL" - * note header. For kdump, the code in vmcore.c runs in the context - * of the second kernel to combine them into one note. + * Protection mechanism for crashkernel reserved memory after + * the kdump kernel is loaded. + * + * Provide an empty default implementation here -- architecture + * code may override this */ -#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - CRASH_CORE_NOTE_NAME_BYTES + \ - CRASH_CORE_NOTE_DESC_BYTES) - -#define VMCOREINFO_BYTES PAGE_SIZE -#define VMCOREINFO_NOTE_NAME "VMCOREINFO" -#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) -#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ - VMCOREINFO_NOTE_NAME_BYTES + \ - VMCOREINFO_BYTES) - -typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; -/* Per cpu memory for storing cpu states in case of system crash. */ -extern note_buf_t __percpu *crash_notes; - -void crash_update_vmcoreinfo_safecopy(void *ptr); -void crash_save_vmcoreinfo(void); -void arch_crash_save_vmcoreinfo(void); -__printf(1, 2) -void vmcoreinfo_append_str(const char *fmt, ...); -phys_addr_t paddr_vmcoreinfo_note(void); - -#define VMCOREINFO_OSRELEASE(value) \ - vmcoreinfo_append_str("OSRELEASE=%s\n", value) -#define VMCOREINFO_BUILD_ID() \ - ({ \ - static_assert(sizeof(vmlinux_build_id) == 20); \ - vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ - }) - -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SYMBOL_ARRAY(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) -#define VMCOREINFO_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(name)) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_TYPE_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(name, field)) -#define VMCOREINFO_LENGTH(name, value) \ - vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) -#define VMCOREINFO_NUMBER(name) \ - vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) -#define VMCOREINFO_CONFIG(name) \ - vmcoreinfo_append_str("CONFIG_%s=y\n", #name) - -extern unsigned char *vmcoreinfo_data; -extern size_t vmcoreinfo_size; -extern u32 *vmcoreinfo_note; - -Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, - void *data, size_t data_len); -void final_note(Elf_Word *buf); - -int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base, - unsigned long long *low_size, bool *high); - -#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION -#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE -#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +static inline void arch_kexec_protect_crashkres(void) { } #endif -#ifndef CRASH_ALIGN -#define CRASH_ALIGN SZ_2M + +#ifndef arch_kexec_unprotect_crashkres +static inline void arch_kexec_unprotect_crashkres(void) { } #endif -#ifndef CRASH_ADDR_LOW_MAX -#define CRASH_ADDR_LOW_MAX SZ_4G + + + +#ifndef arch_crash_handle_hotplug_event +static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { } #endif -#ifndef CRASH_ADDR_HIGH_MAX -#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() + +int crash_check_hotplug_support(void); + +#ifndef arch_crash_hotplug_support +static inline int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) +{ + return 0; +} #endif -void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high); -#else -static inline void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high) -{} +#ifndef crash_get_elfcorehdr_size +static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; } #endif /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 -struct crash_mem { - unsigned int max_nr_ranges; - unsigned int nr_ranges; - struct range ranges[] __counted_by(max_nr_ranges); -}; - extern int crash_exclude_mem_range(struct crash_mem *mem, unsigned long long mstart, unsigned long long mend); @@ -144,4 +72,23 @@ struct kexec_segment; #define KEXEC_CRASH_HP_REMOVE_MEMORY 4 #define KEXEC_CRASH_HP_INVALID_CPU -1U +extern void __crash_kexec(struct pt_regs *regs); +extern void crash_kexec(struct pt_regs *regs); +int kexec_should_crash(struct task_struct *p); +int kexec_crash_loaded(void); +void crash_save_cpu(struct pt_regs *regs, int cpu); +extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); + +#else /* !CONFIG_CRASH_DUMP*/ +struct pt_regs; +struct task_struct; +struct kimage; +static inline void __crash_kexec(struct pt_regs *regs) { } +static inline void crash_kexec(struct pt_regs *regs) { } +static inline int kexec_should_crash(struct task_struct *p) { return 0; } +static inline int kexec_crash_loaded(void) { return 0; } +static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {}; +static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; }; +#endif /* CONFIG_CRASH_DUMP*/ + #endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h new file mode 100644 index 000000000000..5a9df944fb80 --- /dev/null +++ b/include/linux/crash_reserve.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_CRASH_RESERVE_H +#define LINUX_CRASH_RESERVE_H + +#include <linux/linkage.h> +#include <linux/elfcore.h> +#include <linux/elf.h> +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#include <asm/crash_reserve.h> +#endif + +/* Location of a reserved region to hold the crash kernel. + */ +extern struct resource crashk_res; +extern struct resource crashk_low_res; + +int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base, + unsigned long long *low_size, bool *high); + +#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION +#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE +#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +#endif +#ifndef CRASH_ALIGN +#define CRASH_ALIGN SZ_2M +#endif +#ifndef CRASH_ADDR_LOW_MAX +#define CRASH_ADDR_LOW_MAX SZ_4G +#endif +#ifndef CRASH_ADDR_HIGH_MAX +#define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() +#endif + +void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); +#else +static inline void __init reserve_crashkernel_generic(char *cmdline, + unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high) +{} +#endif +#endif /* LINUX_CRASH_RESERVE_H */ diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 03fa6d50d46f..60b25020281f 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -3,6 +3,10 @@ #ifndef _LINUX_CXL_EVENT_H #define _LINUX_CXL_EVENT_H +#include <linux/types.h> +#include <linux/uuid.h> +#include <linux/workqueue_types.h> + /* * Common Event Record Format * CXL rev 3.0 section 8.2.9.2.1; Table 8-42 @@ -91,11 +95,21 @@ struct cxl_event_mem_module { u8 reserved[0x3d]; } __packed; +/* + * General Media or DRAM Event Common Fields + * - provides common access to phys_addr + */ +struct cxl_event_common { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; +} __packed; + union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; + struct cxl_event_common common; } __packed; /* @@ -140,4 +154,29 @@ struct cxl_cper_event_rec { union cxl_event event; } __packed; +struct cxl_cper_work_data { + enum cxl_event_type event_type; + struct cxl_cper_event_rec rec; +}; + +#ifdef CONFIG_ACPI_APEI_GHES +int cxl_cper_register_work(struct work_struct *work); +int cxl_cper_unregister_work(struct work_struct *work); +int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd); +#else +static inline int cxl_cper_register_work(struct work_struct *work) +{ + return 0; +} + +static inline int cxl_cper_unregister_work(struct work_struct *work) +{ + return 0; +} +static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd) +{ + return 0; +} +#endif + #endif /* _LINUX_CXL_EVENT_H */ diff --git a/include/linux/damon.h b/include/linux/damon.h index 5881e4ac30be..f7da65e1ac04 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -127,18 +127,60 @@ enum damos_action { }; /** + * enum damos_quota_goal_metric - Represents the metric to be used as the goal + * + * @DAMOS_QUOTA_USER_INPUT: User-input value. + * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us. + * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. + * + * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. + */ +enum damos_quota_goal_metric { + DAMOS_QUOTA_USER_INPUT, + DAMOS_QUOTA_SOME_MEM_PSI_US, + NR_DAMOS_QUOTA_GOAL_METRICS, +}; + +/** + * struct damos_quota_goal - DAMOS scheme quota auto-tuning goal. + * @metric: Metric to be used for representing the goal. + * @target_value: Target value of @metric to achieve with the tuning. + * @current_value: Current value of @metric. + * @last_psi_total: Last measured total PSI + * @list: List head for siblings. + * + * Data structure for getting the current score of the quota tuning goal. The + * score is calculated by how close @current_value and @target_value are. Then + * the score is entered to DAMON's internal feedback loop mechanism to get the + * auto-tuned quota. + * + * If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually + * entered by the user, probably inside the kdamond callbacks. Otherwise, + * DAMON sets @current_value with self-measured value of @metric. + */ +struct damos_quota_goal { + enum damos_quota_goal_metric metric; + unsigned long target_value; + unsigned long current_value; + /* metric-dependent fields */ + union { + u64 last_psi_total; + }; + struct list_head list; +}; + +/** * struct damos_quota - Controls the aggressiveness of the given scheme. + * @reset_interval: Charge reset interval in milliseconds. * @ms: Maximum milliseconds that the scheme can use. * @sz: Maximum bytes of memory that the action can be applied. - * @reset_interval: Charge reset interval in milliseconds. + * @goals: Head of quota tuning goals (&damos_quota_goal) list. + * @esz: Effective size quota in bytes. * * @weight_sz: Weight of the region's size for prioritization. * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. * @weight_age: Weight of the region's age for prioritization. * - * @get_score: Feedback function for self-tuning quota. - * @get_score_arg: Parameter for @get_score - * * To avoid consuming too much CPU time or IO resources for applying the * &struct damos->action to large memory, DAMON allows users to set time and/or * size quotas. The quotas can be set by writing non-zero values to &ms and @@ -151,42 +193,35 @@ enum damos_action { * throughput of the scheme's action. DAMON then compares it against &sz and * uses smaller one as the effective quota. * + * If @goals is not empt, DAMON calculates yet another size quota based on the + * goals using its internal feedback loop algorithm, for every @reset_interval. + * Then, if the new size quota is smaller than the effective quota, it uses the + * new size quota as the effective quota. + * + * The resulting effective size quota in bytes is set to @esz. + * * For selecting regions within the quota, DAMON prioritizes current scheme's * target memory regions using the &struct damon_operations->get_scheme_score. * You could customize the prioritization logic by setting &weight_sz, * &weight_nr_accesses, and &weight_age, because monitoring operations are * encouraged to respect those. - * - * If @get_score function pointer is set, DAMON calls it back with - * @get_score_arg and get the return value of it for every @reset_interval. - * Then, DAMON adjusts the effective quota using the return value as a feedback - * score to the current quota, using its internal feedback loop algorithm. - * - * The feedback loop algorithem assumes the quota input and the feedback score - * output are in a positive proportional relationship, and the goal of the - * tuning is getting the feedback screo value of 10,000. If @ms and/or @sz are - * set together, those work as a hard limit quota. If neither @ms nor @sz are - * set, the mechanism starts from the quota of one byte. */ struct damos_quota { + unsigned long reset_interval; unsigned long ms; unsigned long sz; - unsigned long reset_interval; + struct list_head goals; + unsigned long esz; unsigned int weight_sz; unsigned int weight_nr_accesses; unsigned int weight_age; - unsigned long (*get_score)(void *arg); - void *get_score_arg; - /* private: */ /* For throughput estimation */ unsigned long total_charged_sz; unsigned long total_charged_ns; - unsigned long esz; /* Effective size quota in bytes */ - /* For charging the quota */ unsigned long charged_sz; unsigned long charged_from; @@ -262,6 +297,7 @@ struct damos_stat { * enum damos_filter_type - Type of memory for &struct damos_filter * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. + * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages. * @DAMOS_FILTER_TYPE_ADDR: Address range. * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. * @NR_DAMOS_FILTER_TYPES: Number of filter types. @@ -280,6 +316,7 @@ struct damos_stat { enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, DAMOS_FILTER_TYPE_MEMCG, + DAMOS_FILTER_TYPE_YOUNG, DAMOS_FILTER_TYPE_ADDR, DAMOS_FILTER_TYPE_TARGET, NR_DAMOS_FILTER_TYPES, @@ -640,6 +677,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damon_for_each_scheme_safe(s, next, ctx) \ list_for_each_entry_safe(s, next, &(ctx)->schemes, list) +#define damos_for_each_quota_goal(goal, quota) \ + list_for_each_entry(goal, "a->goals, list) + +#define damos_for_each_quota_goal_safe(goal, next, quota) \ + list_for_each_entry_safe(goal, next, &(quota)->goals, list) + #define damos_for_each_filter(f, scheme) \ list_for_each_entry(f, &(scheme)->filters, list) @@ -673,6 +716,12 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type, void damos_add_filter(struct damos *s, struct damos_filter *f); void damos_destroy_filter(struct damos_filter *f); +struct damos_quota_goal *damos_new_quota_goal( + enum damos_quota_goal_metric metric, + unsigned long target_value); +void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g); +void damos_destroy_quota_goal(struct damos_quota_goal *goal); + struct damos *damon_new_scheme(struct damos_access_pattern *pattern, enum damos_action action, unsigned long apply_interval_us, diff --git a/include/linux/dax.h b/include/linux/dax.h index b463502b16e1..9d3e3327af4c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -63,6 +63,8 @@ void kill_dax(struct dax_device *dax_dev); void dax_write_cache(struct dax_device *dax_dev, bool wc); bool dax_write_cache_enabled(struct dax_device *dax_dev); bool dax_synchronous(struct dax_device *dax_dev); +void set_dax_nocache(struct dax_device *dax_dev); +void set_dax_nomc(struct dax_device *dax_dev); void set_dax_synchronous(struct dax_device *dax_dev); size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); @@ -86,11 +88,7 @@ static inline void *dax_holder(struct dax_device *dax_dev) static inline struct dax_device *alloc_dax(void *private, const struct dax_operations *ops) { - /* - * Callers should check IS_ENABLED(CONFIG_DAX) to know if this - * NULL is an error or expected. - */ - return NULL; + return ERR_PTR(-EOPNOTSUPP); } static inline void put_dax(struct dax_device *dax_dev) { @@ -109,6 +107,12 @@ static inline bool dax_synchronous(struct dax_device *dax_dev) { return true; } +static inline void set_dax_nocache(struct dax_device *dax_dev) +{ +} +static inline void set_dax_nomc(struct dax_device *dax_dev) +{ +} static inline void set_dax_synchronous(struct dax_device *dax_dev) { } @@ -124,9 +128,6 @@ static inline size_t dax_recovery_write(struct dax_device *dax_dev, } #endif -void set_dax_nocache(struct dax_device *dax_dev); -void set_dax_nomc(struct dax_device *dax_dev); - struct writeback_control; #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX) int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d07cf2f1bb7d..bf53e3894aae 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -125,6 +125,11 @@ enum dentry_d_lock_class DENTRY_D_LOCK_NESTED }; +enum d_real_type { + D_REAL_DATA, + D_REAL_METADATA, +}; + struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); @@ -139,7 +144,7 @@ struct dentry_operations { char *(*d_dname)(struct dentry *, char *, int); struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); - struct dentry *(*d_real)(struct dentry *, const struct inode *); + struct dentry *(*d_real)(struct dentry *, enum d_real_type type); } ____cacheline_aligned; /* @@ -547,24 +552,23 @@ static inline struct inode *d_backing_inode(const struct dentry *upper) /** * d_real - Return the real dentry * @dentry: the dentry to query - * @inode: inode to select the dentry from multiple layers (can be NULL) + * @type: the type of real dentry (data or metadata) * * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.rst */ -static inline struct dentry *d_real(struct dentry *dentry, - const struct inode *inode) +static inline struct dentry *d_real(struct dentry *dentry, enum d_real_type type) { if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) - return dentry->d_op->d_real(dentry, inode); + return dentry->d_op->d_real(dentry, type); else return dentry; } /** - * d_real_inode - Return the real inode + * d_real_inode - Return the real inode hosting the data * @dentry: The dentry to query * * If dentry is on a union/overlay, then return the underlying, real inode. @@ -573,7 +577,7 @@ static inline struct dentry *d_real(struct dentry *dentry, static inline struct inode *d_real_inode(const struct dentry *dentry) { /* This usage of d_real() results in const dentry */ - return d_backing_inode(d_real((struct dentry *) dentry, NULL)); + return d_inode(d_real((struct dentry *) dentry, D_REAL_DATA)); } struct name_snapshot { diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index 6bfe70decc9f..ae80a303c216 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -130,6 +130,16 @@ void _dev_info(const struct device *dev, const char *fmt, ...) }) /* + * Dummy dev_printk for disabled debugging statements to use whilst maintaining + * gcc's format checking. + */ +#define dev_no_printk(level, dev, fmt, ...) \ + ({ \ + if (0) \ + _dev_printk(level, dev, fmt, ##__VA_ARGS__); \ + }) + +/* * #defines for all the dev_<level> macros to prefix with whatever * possible use of #define dev_fmt(fmt) ... */ @@ -158,10 +168,7 @@ void _dev_info(const struct device *dev, const char *fmt, ...) dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #else #define dev_dbg(dev, fmt, ...) \ -({ \ - if (0) \ - dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \ -}) + dev_no_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #endif #ifdef CONFIG_PRINTK @@ -247,20 +254,14 @@ do { \ } while (0) #else #define dev_dbg_ratelimited(dev, fmt, ...) \ -do { \ - if (0) \ - dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \ -} while (0) + dev_no_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #endif #ifdef VERBOSE_DEBUG #define dev_vdbg dev_dbg #else #define dev_vdbg(dev, fmt, ...) \ -({ \ - if (0) \ - dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \ -}) + dev_no_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__) #endif /* diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c008169ed2c6..c8f7eb6cc191 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -63,6 +63,8 @@ void dev_coredumpm(struct device *dev, struct module *owner, void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); + +void dev_coredump_put(struct device *dev); #else static inline void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp) @@ -85,6 +87,9 @@ static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table, { _devcd_free_sgtable(table); } +static inline void dev_coredump_put(struct device *dev) +{ +} #endif /* CONFIG_DEV_COREDUMP */ #endif /* __DEVCOREDUMP_H */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 772ab4d74d94..82b2195efaca 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -165,7 +165,7 @@ void dm_error(const char *message); struct dm_dev { struct block_device *bdev; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct dax_device *dax_dev; blk_mode_t mode; char name[16]; diff --git a/include/linux/device.h b/include/linux/device.h index 97c4b046c09d..fc3bd7116ab9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -132,6 +132,8 @@ ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, char *buf); ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); +ssize_t device_show_string(struct device *dev, struct device_attribute *attr, + char *buf); /** * DEVICE_ATTR - Define a device attribute. @@ -251,6 +253,19 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_bool, device_store_bool), &(_var) } +/** + * DEVICE_STRING_ATTR_RO - Define a device attribute backed by a r/o string. + * @_name: Attribute name. + * @_mode: File mode. + * @_var: Identifier of string. + * + * Like DEVICE_ULONG_ATTR(), but @_var is a string. Because the length of the + * string allocation is unknown, the attribute must be read-only. + */ +#define DEVICE_STRING_ATTR_RO(_name, _mode, _var) \ + struct dev_ext_attribute dev_attr_##_name = \ + { __ATTR(_name, (_mode) & ~0222, device_show_string, NULL), (_var) } + #define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = \ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) @@ -691,6 +706,7 @@ struct device_physical_location { * and optionall (if the coherent mask is large enough) also * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. + * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -803,6 +819,9 @@ struct device { #ifdef CONFIG_DMA_OPS_BYPASS bool dma_ops_bypass : 1; #endif +#ifdef CONFIG_DMA_NEED_SYNC + bool dma_skip_sync:1; +#endif }; /** @@ -1207,17 +1226,6 @@ int __must_check devm_device_add_group(struct device *dev, const struct attribute_group *grp); /* - * Platform "fixup" functions - allow the platform to have their say - * about devices and actions that the general device layer doesn't - * know about. - */ -/* Notify platform of device discovery */ -extern int (*platform_notify)(struct device *dev); - -extern int (*platform_notify_remove)(struct device *dev); - - -/* * get_device - atomically increment the reference count for the device. * */ @@ -1247,6 +1255,7 @@ void device_link_del(struct device_link *link); void device_link_remove(void *consumer, struct device *supplier); void device_links_supplier_sync_state_pause(void); void device_links_supplier_sync_state_resume(void); +void device_link_wait_removal(void); /* Create alias, so I can be autoloaded. */ #define MODULE_ALIAS_CHARDEV(major,minor) \ diff --git a/include/linux/devm-helpers.h b/include/linux/devm-helpers.h index 74891802200d..708ca9131402 100644 --- a/include/linux/devm-helpers.h +++ b/include/linux/devm-helpers.h @@ -41,7 +41,7 @@ static inline void devm_delayed_work_drop(void *res) * detached. A few drivers need delayed work which must be cancelled before * driver is detached to avoid accessing removed resources. * devm_delayed_work_autocancel() can be used to omit the explicit - * cancelleation when driver is detached. + * cancellation when driver is detached. */ static inline int devm_delayed_work_autocancel(struct device *dev, struct delayed_work *w, @@ -66,7 +66,7 @@ static inline void devm_work_drop(void *res) * A few drivers need to queue work which must be cancelled before driver * is detached to avoid accessing removed resources. * devm_work_autocancel() can be used to omit the explicit - * cancelleation when driver is detached. + * cancellation when driver is detached. */ static inline int devm_work_autocancel(struct device *dev, struct work_struct *w, diff --git a/include/linux/dio.h b/include/linux/dio.h index 5abd07361eb5..2b5923909f96 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -68,7 +68,7 @@ struct dio_bus { }; extern struct dio_bus dio_bus; /* Single DIO bus */ -extern struct bus_type dio_bus_type; +extern const struct bus_type dio_bus_type; /* * DIO device IDs diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 75e7d8cbb532..d1503b815a78 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -64,6 +64,9 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start); void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void *dm_bufio_read_with_ioprio(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio); + /* * Like dm_bufio_read, but return buffer from cache, don't read * it. If the buffer is not in the cache, return NULL. @@ -86,6 +89,10 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks); +void dm_bufio_prefetch_with_ioprio(struct dm_bufio_client *c, + sector_t block, unsigned int n_blocks, + unsigned short ioprio); + /* * Release a reference obtained with dm_bufio_{read,get,new}. The data * pointer and dm_buffer pointer is no longer valid after this call. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 7595142f3fc5..7b2968612b7e 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -80,7 +80,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned int num_regions, - struct dm_io_region *region, unsigned int long *sync_error_bits); + struct dm_io_region *region, unsigned int long *sync_error_bits, + unsigned short ioprio); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 8ff4add71f88..36216d28d8bd 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -370,8 +370,10 @@ struct dma_buf { */ struct module *owner; +#if IS_ENABLED(CONFIG_DEBUG_FS) /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; +#endif /** @priv: exporter specific private data for this buffer object. */ void *priv; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 3eb3589ff43e..edbe13d00776 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -54,6 +54,24 @@ static inline phys_addr_t translate_dma_to_phys(struct device *dev, return (phys_addr_t)-1; } +static inline dma_addr_t dma_range_map_min(const struct bus_dma_region *map) +{ + dma_addr_t ret = (dma_addr_t)U64_MAX; + + for (; map->size; map++) + ret = min(ret, map->dma_start); + return ret; +} + +static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map) +{ + dma_addr_t ret = 0; + + for (; map->size; map++) + ret = max(ret, map->dma_start + map->size - 1); + return ret; +} + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include <asm/dma-direct.h> #ifndef phys_to_dma_unencrypted diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 4bdf0b96da28..ad9e2506c2f4 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -86,10 +86,8 @@ dma_fence_chain_contained(struct dma_fence *fence) * * Returns a new struct dma_fence_chain object or NULL on failure. */ -static inline struct dma_fence_chain *dma_fence_chain_alloc(void) -{ - return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); -}; +#define dma_fence_chain_alloc() \ + ((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL)) /** * dma_fence_chain_free diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4abc60f04209..02a1c825896b 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -18,8 +18,11 @@ struct iommu_ops; * * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. + * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is + * coherent and it's not an SWIOTLB buffer. */ #define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) +#define DMA_F_CAN_SKIP_SYNC (1 << 1) struct dma_map_ops { unsigned int flags; @@ -29,7 +32,7 @@ struct dma_map_ops { unsigned long attrs); void (*free)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); - struct page *(*alloc_pages)(struct device *dev, size_t size, + struct page *(*alloc_pages_op)(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, @@ -273,6 +276,15 @@ static inline bool dev_is_dma_coherent(struct device *dev) } #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ +static inline void dma_reset_need_sync(struct device *dev) +{ +#ifdef CONFIG_DMA_NEED_SYNC + /* Reset it only once so that the function can be called on hotpath */ + if (unlikely(dev->dma_skip_sync)) + dev->dma_skip_sync = false; +#endif +} + /* * Check whether potential kmalloc() buffers are safe for non-coherent DMA. */ @@ -426,11 +438,9 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, #endif #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent); +void arch_setup_dma_ops(struct device *dev, bool coherent); #else -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, bool coherent) +static inline void arch_setup_dma_ops(struct device *dev, bool coherent) { } #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4a658de44ee9..f693aafe221f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -117,14 +117,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir); -void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir); -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir); void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, @@ -147,7 +139,6 @@ u64 dma_get_required_mask(struct device *dev); bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); -bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); @@ -195,22 +186,6 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ -} -static inline void dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} -static inline void dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, enum dma_data_direction dir) -{ -} static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return -ENOMEM; @@ -277,10 +252,6 @@ static inline size_t dma_opt_mapping_size(struct device *dev) { return 0; } -static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) -{ - return false; -} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; @@ -310,6 +281,82 @@ static inline int dma_mmap_noncontiguous(struct device *dev, } #endif /* CONFIG_HAS_DMA */ +#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) +void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir); +void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir); +void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir); +bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); + +static inline bool dma_dev_need_sync(const struct device *dev) +{ + /* Always call DMA sync operations when debugging is enabled */ + return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG); +} + +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_single_for_cpu(dev, addr, size, dir); +} + +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_single_for_device(dev, addr, size, dir); +} + +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_sg_for_cpu(dev, sg, nelems, dir); +} + +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ + if (dma_dev_need_sync(dev)) + __dma_sync_sg_for_device(dev, sg, nelems, dir); +} + +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; +} +#else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ +static inline bool dma_dev_need_sync(const struct device *dev) +{ + return false; +} +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ +} +static inline void dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} +static inline void dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction dir) +{ +} +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} +#endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ + struct page *dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void dma_free_pages(struct device *dev, size_t size, struct page *page, diff --git a/include/linux/dma/imx-dma.h b/include/linux/dma/imx-dma.h index cfec5f946e23..76a8de9ae151 100644 --- a/include/linux/dma/imx-dma.h +++ b/include/linux/dma/imx-dma.h @@ -41,6 +41,7 @@ enum sdma_peripheral_type { IMX_DMATYPE_SAI, /* SAI */ IMX_DMATYPE_MULTI_SAI, /* MULTI FIFOs For Audio */ IMX_DMATYPE_HDMI, /* HDMI Audio */ + IMX_DMATYPE_I2C, /* I2C */ }; enum imx_dma_prio { diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index e443be4d3b4b..1e491c5dcac2 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -26,6 +26,11 @@ struct k3_udma_glue_tx_channel; struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev, const char *name, struct k3_udma_glue_tx_channel_cfg *cfg); +struct k3_udma_glue_tx_channel * +k3_udma_glue_request_tx_chn_for_thread_id(struct device *dev, + struct k3_udma_glue_tx_channel_cfg *cfg, + struct device_node *udmax_np, u32 thread_id); + void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn); int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn, struct cppi5_host_desc_t *desc_tx, @@ -109,6 +114,11 @@ struct k3_udma_glue_rx_channel *k3_udma_glue_request_rx_chn( const char *name, struct k3_udma_glue_rx_channel_cfg *cfg); +struct k3_udma_glue_rx_channel * +k3_udma_glue_request_remote_rx_chn_for_thread_id(struct device *dev, + struct k3_udma_glue_rx_channel_cfg *cfg, + struct device_node *udmax_np, u32 thread_id); + void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e34b601b71fd..499bb2c63483 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -117,7 +117,7 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, int count); /* Intel IOMMU detection */ void detect_intel_iommu(void); -extern int enable_drhd_fault_handling(void); +extern int enable_drhd_fault_handling(unsigned int cpu); extern int dmar_device_add(acpi_handle handle); extern int dmar_device_remove(acpi_handle handle); diff --git a/include/linux/dpll.h b/include/linux/dpll.h index e37344f6a231..d275736230b3 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -21,6 +21,7 @@ struct dpll_device_ops { enum dpll_mode *mode, struct netlink_ext_ack *extack); int (*lock_status_get)(const struct dpll_device *dpll, void *dpll_priv, enum dpll_lock_status *status, + enum dpll_lock_status_error *status_error, struct netlink_ext_ack *extack); int (*temp_get)(const struct dpll_device *dpll, void *dpll_priv, s32 *temp, struct netlink_ext_ack *extack); diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 4fcbf4d4fd0a..ff44ec346162 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -305,9 +305,9 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor, #define DYNAMIC_DEBUG_BRANCH(descriptor) false #define dynamic_pr_debug(fmt, ...) \ - do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #define dynamic_dev_dbg(dev, fmt, ...) \ - do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0) + dev_no_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__) #define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \ groupsize, buf, len, ascii) \ do { if (0) \ diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 407c2f281b64..281298e77a15 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -38,14 +38,25 @@ #ifdef __KERNEL__ +#include <linux/bitops.h> #include <asm/bug.h> +#define DQL_HIST_LEN 4 +#define DQL_HIST_ENT(dql, idx) ((dql)->history[(idx) % DQL_HIST_LEN]) + struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ + /* Stall threshold (in jiffies), defined by user */ + unsigned short stall_thrs; + + unsigned long history_head; /* top 58 bits of jiffies */ + /* stall entries, a bit per entry */ + unsigned long history[DQL_HIST_LEN]; + /* Fields accessed only by completion path (dql_completed) */ unsigned int limit ____cacheline_aligned_in_smp; /* Current limit */ @@ -62,19 +73,59 @@ struct dql { unsigned int max_limit; /* Max limit */ unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ + + /* Longest stall detected, reported to user */ + unsigned short stall_max; + unsigned long last_reap; /* Last reap (in jiffies) */ + unsigned long stall_cnt; /* Number of stalls */ }; /* Set some static maximums */ #define DQL_MAX_OBJECT (UINT_MAX / 16) #define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT) +/* Populate the bitmap to be processed later in dql_check_stall() */ +static inline void dql_queue_stall(struct dql *dql) +{ + unsigned long map, now, now_hi, i; + + now = jiffies; + now_hi = now / BITS_PER_LONG; + + /* The following code set a bit in the ring buffer, where each + * bit trackes time the packet was queued. The dql->history buffer + * tracks DQL_HIST_LEN * BITS_PER_LONG time (jiffies) slot + */ + if (unlikely(now_hi != dql->history_head)) { + /* About to reuse slots, clear them */ + for (i = 0; i < DQL_HIST_LEN; i++) { + /* Multiplication masks high bits */ + if (now_hi * BITS_PER_LONG == + (dql->history_head + i) * BITS_PER_LONG) + break; + DQL_HIST_ENT(dql, dql->history_head + i + 1) = 0; + } + /* pairs with smp_rmb() in dql_check_stall() */ + smp_wmb(); + WRITE_ONCE(dql->history_head, now_hi); + } + + /* __set_bit() does not guarantee WRITE_ONCE() semantics */ + map = DQL_HIST_ENT(dql, now_hi); + + /* Populate the history with an entry (bit) per queued */ + if (!(map & BIT_MASK(now))) + WRITE_ONCE(DQL_HIST_ENT(dql, now_hi), map | BIT_MASK(now)); +} + /* * Record number of objects queued. Assumes that caller has already checked * availability in the queue with dql_avail. */ static inline void dql_queued(struct dql *dql, unsigned int count) { - BUG_ON(count > DQL_MAX_OBJECT); + if (WARN_ON_ONCE(count > DQL_MAX_OBJECT)) + return; dql->last_obj_cnt = count; @@ -86,6 +137,10 @@ static inline void dql_queued(struct dql *dql, unsigned int count) barrier(); dql->num_queued += count; + + /* Only populate stall information if the threshold is set */ + if (READ_ONCE(dql->stall_thrs)) + dql_queue_stall(dql); } /* Returns how many objects can be queued, < 0 indicates over limit. */ diff --git a/include/linux/efi.h b/include/linux/efi.h index c74f47711f0b..418e555459da 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -386,6 +386,7 @@ void efi_native_runtime_setup(void); #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) +#define EFI_TCG2_FINAL_EVENTS_TABLE_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) @@ -400,6 +401,8 @@ void efi_native_runtime_setup(void); #define EFI_CERT_X509_GUID EFI_GUID(0xa5c059a1, 0x94e4, 0x4aa7, 0x87, 0xb5, 0xab, 0x15, 0x5c, 0x2b, 0xf0, 0x72) #define EFI_CERT_X509_SHA256_GUID EFI_GUID(0x3bd2a492, 0x96c0, 0x4079, 0xb4, 0x20, 0xfc, 0xf9, 0x8e, 0xf1, 0x03, 0xed) #define EFI_CC_BLOB_GUID EFI_GUID(0x067b1f5f, 0xcf26, 0x44c5, 0x85, 0x54, 0x93, 0xd7, 0x77, 0x91, 0x2d, 0x42) +#define EFI_CC_MEASUREMENT_PROTOCOL_GUID EFI_GUID(0x96751a3d, 0x72f4, 0x41a6, 0xa7, 0x94, 0xed, 0x5d, 0x0e, 0x67, 0xae, 0x6b) +#define EFI_CC_FINAL_EVENTS_TABLE_GUID EFI_GUID(0xdd4a4648, 0x2de7, 0x4665, 0x96, 0x4d, 0x21, 0xd9, 0xef, 0x5f, 0xb4, 0x46) /* * This GUID is used to pass to the kernel proper the struct screen_info @@ -411,7 +414,6 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) -#define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) @@ -692,6 +694,11 @@ extern struct efi { extern struct mm_struct efi_mm; +static inline bool mm_is_efi(struct mm_struct *mm) +{ + return IS_ENABLED(CONFIG_EFI) && mm == &efi_mm; +} + static inline int efi_guidcmp (efi_guid_t left, efi_guid_t right) { @@ -1065,12 +1072,11 @@ static inline u64 efivar_reserved_space(void) { return 0; } #endif /* - * The maximum size of VariableName + Data = 1024 - * Therefore, it's reasonable to save that much - * space in each part of the structure, - * and we use a page for reading/writing. + * There is no actual upper limit specified for the variable name size. + * + * This limit exists only for practical purposes, since name conversions + * are bounds-checked and name data is occasionally stored in-line. */ - #define EFI_VAR_NAME_LEN 1024 int efivars_register(struct efivars *efivars, diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h new file mode 100644 index 000000000000..624ff6ff41f9 --- /dev/null +++ b/include/linux/einj-cxl.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * CXL protocol Error INJection support. + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Ben Cheatham <benjamin.cheatham@amd.com> + */ +#ifndef EINJ_CXL_H +#define EINJ_CXL_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct pci_dev; +struct seq_file; + +#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) +int einj_cxl_available_error_type_show(struct seq_file *m, void *v); +int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type); +int einj_cxl_inject_rch_error(u64 rcrb, u64 type); +bool einj_cxl_is_initialized(void); +#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */ +static inline int einj_cxl_available_error_type_show(struct seq_file *m, + void *v) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type) +{ + return -ENXIO; +} + +static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type) +{ + return -ENXIO; +} + +static inline bool einj_cxl_is_initialized(void) { return false; } +#endif /* CONFIG_ACPI_APEI_EINJ_CXL */ + +#endif /* EINJ_CXL_H */ diff --git a/include/linux/elf.h b/include/linux/elf.h index c9a46c4e183b..5c402788da19 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -65,7 +65,7 @@ extern Elf64_Dyn _DYNAMIC []; struct file; struct coredump_params; -#ifndef ARCH_HAVE_EXTRA_ELF_NOTES +#ifndef CONFIG_ARCH_HAVE_EXTRA_ELF_NOTES static inline int elf_coredump_extra_notes_size(void) { return 0; } static inline int elf_coredump_extra_notes_write(struct coredump_params *cprm) { return 0; } #else diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 88d91e087471..1ff52020cf75 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -5,6 +5,7 @@ #include <linux/device.h> #include <linux/jump_label.h> #include <linux/kobject.h> +#include <linux/kref.h> #include <linux/rcupdate.h> #include <linux/sched/cpufreq.h> #include <linux/sched/topology.h> @@ -12,6 +13,7 @@ /** * struct em_perf_state - Performance state of a performance domain + * @performance: CPU performance (capacity) at a given frequency * @frequency: The frequency in KHz, for consistency with CPUFreq * @power: The power consumed at this level (by 1 CPU or by a registered * device). It can be a total power: static and dynamic. @@ -20,6 +22,7 @@ * @flags: see "em_perf_state flags" description below. */ struct em_perf_state { + unsigned long performance; unsigned long frequency; unsigned long power; unsigned long cost; @@ -37,8 +40,20 @@ struct em_perf_state { #define EM_PERF_STATE_INEFFICIENT BIT(0) /** + * struct em_perf_table - Performance states table + * @rcu: RCU used for safe access and destruction + * @kref: Reference counter to track the users + * @state: List of performance states, in ascending order + */ +struct em_perf_table { + struct rcu_head rcu; + struct kref kref; + struct em_perf_state state[]; +}; + +/** * struct em_perf_domain - Performance domain - * @table: List of performance states, in ascending order + * @em_table: Pointer to the runtime modifiable em_perf_table * @nr_perf_states: Number of performance states * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here @@ -53,7 +68,7 @@ struct em_perf_state { * field is unused. */ struct em_perf_domain { - struct em_perf_state *table; + struct em_perf_table __rcu *em_table; int nr_perf_states; unsigned long flags; unsigned long cpus[]; @@ -98,27 +113,6 @@ struct em_perf_domain { #define EM_MAX_NUM_CPUS 16 #endif -/* - * To avoid an overflow on 32bit machines while calculating the energy - * use a different order in the operation. First divide by the 'cpu_scale' - * which would reduce big value stored in the 'cost' field, then multiply by - * the 'sum_util'. This would allow to handle existing platforms, which have - * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. - * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' - * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. - * This reordering of operations has some limitations, we lose small - * precision in the estimation (comparing to 64bit platform w/o reordering). - * - * We are safe on 64bit machine. - */ -#ifdef CONFIG_64BIT -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) * (sum_util)) / (scale_cpu)) -#else -#define em_estimate_energy(cost, sum_util, scale_cpu) \ - (((cost) / (scale_cpu)) * (sum_util)) -#endif - struct em_data_callback { /** * active_power() - Provide power at the next performance state of @@ -168,40 +162,49 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table __rcu *table); +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states); +int em_dev_update_chip_binning(struct device *dev); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM - * @pd : Performance domain for which we want an efficient frequency - * @freq : Frequency to map with the EM + * @table: List of performance states, in ascending order + * @nr_perf_states: Number of performance states + * @max_util: Max utilization to map with the EM + * @pd_flags: Performance Domain flags * * It is called from the scheduler code quite frequently and as a consequence * doesn't implement any check. * - * Return: An efficient performance state, high enough to meet @freq + * Return: An efficient performance state id, high enough to meet @max_util * requirement. */ -static inline -struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd, - unsigned long freq) +static inline int +em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states, + unsigned long max_util, unsigned long pd_flags) { struct em_perf_state *ps; int i; - for (i = 0; i < pd->nr_perf_states; i++) { - ps = &pd->table[i]; - if (ps->frequency >= freq) { - if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && + for (i = 0; i < nr_perf_states; i++) { + ps = &table[i]; + if (ps->performance >= max_util) { + if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES && ps->flags & EM_PERF_STATE_INEFFICIENT) continue; - break; + return i; } } - return ps; + return nr_perf_states - 1; } /** @@ -224,9 +227,13 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, unsigned long max_util, unsigned long sum_util, unsigned long allowed_cpu_cap) { - unsigned long freq, ref_freq, scale_cpu; + struct em_perf_table *em_table; struct em_perf_state *ps; - int cpu; + int i; + +#ifdef CONFIG_SCHED_DEBUG + WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); +#endif if (!sum_util) return 0; @@ -234,31 +241,29 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, /* * In order to predict the performance state, map the utilization of * the most utilized CPU of the performance domain to a requested - * frequency, like schedutil. Take also into account that the real - * frequency might be set lower (due to thermal capping). Thus, clamp + * performance, like schedutil. Take also into account that the real + * performance might be set lower (due to thermal capping). Thus, clamp * max utilization to the allowed CPU capacity before calculating - * effective frequency. + * effective performance. */ - cpu = cpumask_first(to_cpumask(pd->cpus)); - scale_cpu = arch_scale_cpu_capacity(cpu); - ref_freq = arch_scale_freq_ref(cpu); - max_util = min(max_util, allowed_cpu_cap); - freq = map_util_freq(max_util, ref_freq, scale_cpu); /* * Find the lowest performance state of the Energy Model above the - * requested frequency. + * requested performance. */ - ps = em_pd_get_efficient_state(pd, freq); + em_table = rcu_dereference(pd->em_table); + i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states, + max_util, pd->flags); + ps = &em_table->state[i]; /* - * The capacity of a CPU in the domain at the performance state (ps) - * can be computed as: + * The performance (capacity) of a CPU in the domain at the performance + * state (ps) can be computed as: * - * ps->freq * scale_cpu - * ps->cap = -------------------- (1) - * cpu_max_freq + * ps->freq * scale_cpu + * ps->performance = -------------------- (1) + * cpu_max_freq * * So, ignoring the costs of idle states (which are not available in * the EM), the energy consumed by this CPU at that performance state @@ -266,9 +271,10 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * * ps->power * cpu_util * cpu_nrg = -------------------- (2) - * ps->cap + * ps->performance * - * since 'cpu_util / ps->cap' represents its percentage of busy time. + * since 'cpu_util / ps->performance' represents its percentage of busy + * time. * * NOTE: Although the result of this computation actually is in * units of power, it can be manipulated as an energy value @@ -278,9 +284,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * By injecting (1) in (2), 'cpu_nrg' can be re-expressed as a product * of two terms: * - * ps->power * cpu_max_freq cpu_util - * cpu_nrg = ------------------------ * --------- (3) - * ps->freq scale_cpu + * ps->power * cpu_max_freq + * cpu_nrg = ------------------------ * cpu_util (3) + * ps->freq * scale_cpu * * The first term is static, and is stored in the em_perf_state struct * as 'ps->cost'. @@ -290,11 +296,9 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * total energy of the domain (which is the simple sum of the energy of * all of its CPUs) can be factorized as: * - * ps->cost * \Sum cpu_util - * pd_nrg = ------------------------ (4) - * scale_cpu + * pd_nrg = ps->cost * \Sum cpu_util (4) */ - return em_estimate_energy(ps->cost, sum_util, scale_cpu); + return ps->cost * sum_util; } /** @@ -309,6 +313,23 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) return pd->nr_perf_states; } +/** + * em_perf_state_from_pd() - Get the performance states table of perf. + * domain + * @pd : performance domain for which this must be done + * + * To use this function the rcu_read_lock() should be hold. After the usage + * of the performance states table is finished, the rcu_read_unlock() should + * be called. + * + * Return: the pointer to performance states table of the performance domain + */ +static inline +struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd) +{ + return rcu_dereference(pd->em_table)->state; +} + #else struct em_data_callback {}; #define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { } @@ -343,6 +364,33 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) { return 0; } +static inline +struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +{ + return NULL; +} +static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline +int em_dev_update_perf_domain(struct device *dev, + struct em_perf_table __rcu *new_table) +{ + return -EINVAL; +} +static inline +struct em_perf_state *em_perf_state_from_pd(struct em_perf_domain *pd) +{ + return NULL; +} +static inline +int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, + int nr_states) +{ + return -EINVAL; +} +static inline int em_dev_update_chip_binning(struct device *dev) +{ + return -EINVAL; +} #endif #endif diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 224645f17c33..2ad1ffa4ccb9 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -71,6 +71,12 @@ static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; #define eth_stp_addr eth_reserved_addr_base +static const u8 eth_ipv4_mcast_addr_base[ETH_ALEN] __aligned(2) = +{ 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; + +static const u8 eth_ipv6_mcast_addr_base[ETH_ALEN] __aligned(2) = +{ 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }; + /** * is_link_local_ether_addr - Determine if given Ethernet address is link-local * @addr: Pointer to a six-byte array containing the Ethernet address @@ -430,18 +436,16 @@ static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2, static inline bool ether_addr_is_ipv4_mcast(const u8 *addr) { - u8 base[ETH_ALEN] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 }; u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 }; - return ether_addr_equal_masked(addr, base, mask); + return ether_addr_equal_masked(addr, eth_ipv4_mcast_addr_base, mask); } static inline bool ether_addr_is_ipv6_mcast(const u8 *addr) { - u8 base[ETH_ALEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 }; u8 mask[ETH_ALEN] = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }; - return ether_addr_equal_masked(addr, base, mask); + return ether_addr_equal_masked(addr, eth_ipv6_mcast_addr_base, mask); } static inline bool ether_addr_is_ip_mcast(const u8 *addr) @@ -608,6 +612,31 @@ static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr, } /** + * eth_skb_pkt_type - Assign packet type if destination address does not match + * @skb: Assigned a packet type if address does not match @dev address + * @dev: Network device used to compare packet address against + * + * If the destination MAC address of the packet does not match the network + * device address, assign an appropriate packet type. + */ +static inline void eth_skb_pkt_type(struct sk_buff *skb, + const struct net_device *dev) +{ + const struct ethhdr *eth = eth_hdr(skb); + + if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) { + if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { + if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + } else { + skb->pkt_type = PACKET_OTHERHOST; + } + } +} + +/** * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame * @skb: Buffer to pad * diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 325e0778e937..6fd9107d3cc0 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -222,6 +222,16 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +struct ethtool_keee { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); + __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertised); + u32 tx_lpi_timer; + bool tx_lpi_enabled; + bool eee_active; + bool eee_enabled; +}; + struct kernel_ethtool_coalesce { u8 use_cqe_mode_tx; u8 use_cqe_mode_rx; @@ -470,6 +480,26 @@ struct ethtool_rmon_stats { ); }; +/** + * struct ethtool_ts_stats - HW timestamping statistics + * @pkts: Number of packets successfully timestamped by the hardware. + * @lost: Number of hardware timestamping requests where the timestamping + * information from the hardware never arrived for submission with + * the skb. + * @err: Number of arbitrary timestamp generation error events that the + * hardware encountered, exclusive of @lost statistics. Cases such + * as resource exhaustion, unavailability, firmware errors, and + * detected illogical timestamp values not submitted with the skb + * are inclusive to this counter. + */ +struct ethtool_ts_stats { + struct_group(tx_stats, + u64 pkts; + u64 lost; + u64 err; + ); +}; + #define ETH_MODULE_EEPROM_PAGE_LEN 128 #define ETH_MODULE_MAX_I2C_ADDRESS 0x7f @@ -745,7 +775,10 @@ struct ethtool_rxfh_param { * @get_ts_info: Get the time stamping and PTP hardware clock capabilities. * It may be called with RCU, or rtnl or reference on the device. * Drivers supporting transmit time stamps in software should set this to - * ethtool_op_get_ts_info(). + * ethtool_op_get_ts_info(). Drivers must not zero statistics which they + * don't report. The stats structure is initialized to ETHTOOL_STAT_NOT_SET + * indicating driver does not report statistics. + * @get_ts_stats: Query the device hardware timestamping statistics. * @get_module_info: Get the size and type of the eeprom contained within * a plug-in module. * @get_module_eeprom: Get the eeprom information from the plug-in module @@ -888,12 +921,14 @@ struct ethtool_ops { struct ethtool_dump *, void *); int (*set_dump)(struct net_device *, struct ethtool_dump *); int (*get_ts_info)(struct net_device *, struct ethtool_ts_info *); + void (*get_ts_stats)(struct net_device *dev, + struct ethtool_ts_stats *ts_stats); int (*get_module_info)(struct net_device *, struct ethtool_modinfo *); int (*get_module_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); - int (*get_eee)(struct net_device *, struct ethtool_eee *); - int (*set_eee)(struct net_device *, struct ethtool_eee *); + int (*get_eee)(struct net_device *dev, struct ethtool_keee *eee); + int (*set_eee)(struct net_device *dev, struct ethtool_keee *eee); int (*get_tunable)(struct net_device *, const struct ethtool_tunable *, void *); int (*set_tunable)(struct net_device *, diff --git a/include/linux/evm.h b/include/linux/evm.h index 36ec884320d9..ddece4a6b25d 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -12,52 +12,12 @@ #include <linux/integrity.h> #include <linux/xattr.h> -struct integrity_iint_cache; - #ifdef CONFIG_EVM extern int evm_set_key(void *key, size_t keylen); extern enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint); -extern int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr); -extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid); -extern int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size); -extern void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len); -extern int evm_inode_copy_up_xattr(const char *name); -extern int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *xattr_name); -extern void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name); -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - evm_inode_post_removexattr(dentry, acl_name); -} -extern int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return evm_inode_set_acl(idmap, dentry, acl_name, NULL); -} -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return evm_inode_post_setxattr(dentry, acl_name, NULL, 0); -} - + size_t xattr_value_len); int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count); @@ -66,6 +26,8 @@ extern int evm_protected_xattr_if_enabled(const char *req_xattr_name); extern int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer, int buffer_size, char type, bool canonical_fmt); +extern bool evm_metadata_changed(struct inode *inode, + struct inode *metadata_inode); #ifdef CONFIG_FS_POSIX_ACL extern int posix_xattr_acl(const char *xattrname); #else @@ -85,85 +47,12 @@ static inline int evm_set_key(void *key, size_t keylen) static inline enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, - size_t xattr_value_len, - struct integrity_iint_cache *iint) + size_t xattr_value_len) { return INTEGRITY_UNKNOWN; } #endif -static inline int evm_inode_setattr(struct mnt_idmap *idmap, - struct dentry *dentry, struct iattr *attr) -{ - return 0; -} - -static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid) -{ - return; -} - -static inline int evm_inode_setxattr(struct mnt_idmap *idmap, - struct dentry *dentry, const char *name, - const void *value, size_t size) -{ - return 0; -} - -static inline void evm_inode_post_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return; -} - -static inline int evm_inode_copy_up_xattr(const char *name) -{ - return 0; -} - -static inline int evm_inode_removexattr(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline void evm_inode_post_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return; -} - -static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return; -} - -static inline int evm_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - return 0; -} - -static inline int evm_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} - -static inline void evm_inode_post_set_acl(struct dentry *dentry, - const char *acl_name, - struct posix_acl *kacl) -{ - return; -} - static inline int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, @@ -189,5 +78,11 @@ static inline int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer, return -EOPNOTSUPP; } +static inline bool evm_metadata_changed(struct inode *inode, + struct inode *metadata_inode) +{ + return false; +} + #endif /* CONFIG_EVM */ #endif /* LINUX_EVM_H */ diff --git a/include/linux/execmem.h b/include/linux/execmem.h new file mode 100644 index 000000000000..32cef1144117 --- /dev/null +++ b/include/linux/execmem.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_EXECMEM_ALLOC_H +#define _LINUX_EXECMEM_ALLOC_H + +#include <linux/types.h> +#include <linux/moduleloader.h> + +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) +#include <linux/kasan.h> +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#else +#define MODULE_ALIGN PAGE_SIZE +#endif + +/** + * enum execmem_type - types of executable memory ranges + * + * There are several subsystems that allocate executable memory. + * Architectures define different restrictions on placement, + * permissions, alignment and other parameters for memory that can be used + * by these subsystems. + * Types in this enum identify subsystems that allocate executable memory + * and let architectures define parameters for ranges suitable for + * allocations by each subsystem. + * + * @EXECMEM_DEFAULT: default parameters that would be used for types that + * are not explicitly defined. + * @EXECMEM_MODULE_TEXT: parameters for module text sections + * @EXECMEM_KPROBES: parameters for kprobes + * @EXECMEM_FTRACE: parameters for ftrace + * @EXECMEM_BPF: parameters for BPF + * @EXECMEM_MODULE_DATA: parameters for module data sections + * @EXECMEM_TYPE_MAX: + */ +enum execmem_type { + EXECMEM_DEFAULT, + EXECMEM_MODULE_TEXT = EXECMEM_DEFAULT, + EXECMEM_KPROBES, + EXECMEM_FTRACE, + EXECMEM_BPF, + EXECMEM_MODULE_DATA, + EXECMEM_TYPE_MAX, +}; + +/** + * enum execmem_range_flags - options for executable memory allocations + * @EXECMEM_KASAN_SHADOW: allocate kasan shadow + */ +enum execmem_range_flags { + EXECMEM_KASAN_SHADOW = (1 << 0), +}; + +/** + * struct execmem_range - definition of an address space suitable for code and + * related data allocations + * @start: address space start + * @end: address space end (inclusive) + * @fallback_start: start of the secondary address space range for fallback + * allocations on architectures that require it + * @fallback_end: start of the secondary address space (inclusive) + * @pgprot: permissions for memory in this address space + * @alignment: alignment required for text allocations + * @flags: options for memory allocations for this range + */ +struct execmem_range { + unsigned long start; + unsigned long end; + unsigned long fallback_start; + unsigned long fallback_end; + pgprot_t pgprot; + unsigned int alignment; + enum execmem_range_flags flags; +}; + +/** + * struct execmem_info - architecture parameters for code allocations + * @ranges: array of parameter sets defining architecture specific + * parameters for executable memory allocations. The ranges that are not + * explicitly initialized by an architecture use parameters defined for + * @EXECMEM_DEFAULT. + */ +struct execmem_info { + struct execmem_range ranges[EXECMEM_TYPE_MAX]; +}; + +/** + * execmem_arch_setup - define parameters for allocations of executable memory + * + * A hook for architectures to define parameters for allocations of + * executable memory. These parameters should be filled into the + * @execmem_info structure. + * + * For architectures that do not implement this method a default set of + * parameters will be used + * + * Return: a structure defining architecture parameters and restrictions + * for allocations of executable memory + */ +struct execmem_info *execmem_arch_setup(void); + +/** + * execmem_alloc - allocate executable memory + * @type: type of the allocation + * @size: how many bytes of memory are required + * + * Allocates memory that will contain executable code, either generated or + * loaded from kernel modules. + * + * Allocates memory that will contain data coupled with executable code, + * like data sections in kernel modules. + * + * The memory will have protections defined by architecture for executable + * region of the @type. + * + * Return: a pointer to the allocated memory or %NULL + */ +void *execmem_alloc(enum execmem_type type, size_t size); + +/** + * execmem_free - free executable memory + * @ptr: pointer to the memory that should be freed + */ +void execmem_free(void *ptr); + +#if defined(CONFIG_EXECMEM) && !defined(CONFIG_ARCH_WANTS_EXECMEM_LATE) +void execmem_init(void); +#else +static inline void execmem_init(void) {} +#endif + +#endif /* _LINUX_EXECMEM_ALLOC_H */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 053137a0fe45..41d1d71c36ff 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -27,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) +#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 @@ -40,12 +41,6 @@ #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_IO_SIZE(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */ -#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ -#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) -#define F2FS_IO_ALIGNED(sbi) (F2FS_IO_SIZE(sbi) > 1) - /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) @@ -81,6 +76,7 @@ enum stop_cp_reason { STOP_CP_REASON_CORRUPTED_SUMMARY, STOP_CP_REASON_UPDATE_INODE, STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_MAX, }; @@ -398,7 +394,8 @@ struct f2fs_nat_block { /* * F2FS uses 4 bytes to represent block address. As a result, supported size of - * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. + * disk is 16 TB for a 4K page size and 64 TB for a 16K page size and it equals + * to 16 * 1024 * 1024 / 2 segments. */ #define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2) @@ -428,8 +425,10 @@ struct f2fs_sit_block { /* * For segment summary * - * One summary block contains exactly 512 summary entries, which represents - * exactly one segment by default. Not allow to change the basic units. + * One summary block with 4KB size contains exactly 512 summary entries, which + * represents exactly one segment with 2MB size. + * Similarly, in the case of block with 16KB size, it represents one segment with 8MB size. + * Not allow to change the basic units. * * NOTE: For initializing fields, you must use set_summary * @@ -560,6 +559,7 @@ typedef __le32 f2fs_hash_t; /* * space utilization of regular dentry and inline dentry (w/o extra reservation) + * when block size is 4KB. * regular dentry inline dentry (def) inline dentry (min) * bitmap 1 * 27 = 27 1 * 23 = 23 1 * 1 = 1 * reserved 1 * 3 = 3 1 * 7 = 7 1 * 1 = 1 diff --git a/include/linux/fb.h b/include/linux/fb.h index 05dc9624897d..0f0834939b6a 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -2,28 +2,30 @@ #ifndef _LINUX_FB_H #define _LINUX_FB_H -#include <linux/refcount.h> -#include <linux/kgdb.h> #include <uapi/linux/fb.h> #define FBIO_CURSOR _IOWR('F', 0x08, struct fb_cursor_user) -#include <linux/fs.h> -#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/printk.h> +#include <linux/refcount.h> +#include <linux/types.h> #include <linux/workqueue.h> -#include <linux/notifier.h> -#include <linux/list.h> -#include <linux/backlight.h> -#include <linux/slab.h> -#include <asm/fb.h> +#include <asm/video.h> -struct vm_area_struct; -struct fb_info; +struct backlight_device; struct device; +struct device_node; +struct fb_info; struct file; +struct i2c_adapter; +struct inode; +struct module; +struct notifier_block; +struct page; struct videomode; -struct device_node; +struct vm_area_struct; /* Definitions below are used in the parsed monitor specs */ #define FB_DPMS_ACTIVE_OFF 1 @@ -143,9 +145,13 @@ struct fb_event { void *data; }; +/* Enough for the VT console needs, see its max_font_width/height */ +#define FB_MAX_BLIT_WIDTH 64 +#define FB_MAX_BLIT_HEIGHT 128 + struct fb_blit_caps { - u32 x; - u32 y; + DECLARE_BITMAP(x, FB_MAX_BLIT_WIDTH); + DECLARE_BITMAP(y, FB_MAX_BLIT_HEIGHT); u32 len; u32 flags; }; @@ -192,10 +198,12 @@ struct fb_pixmap { u32 scan_align; /* alignment per scanline */ u32 access_align; /* alignment per read/write (bits) */ u32 flags; /* see FB_PIXMAP_* */ - u32 blit_x; /* supported bit block dimensions (1-32)*/ - u32 blit_y; /* Format: blit_x = 1 << (width - 1) */ - /* blit_y = 1 << (height - 1) */ - /* if 0, will be set to 0xffffffff (all)*/ + /* supported bit block dimensions */ + /* Format: test_bit(width - 1, blit_x) */ + /* test_bit(height - 1, blit_y) */ + /* if zero, will be set to full (all) */ + DECLARE_BITMAP(blit_x, FB_MAX_BLIT_WIDTH); + DECLARE_BITMAP(blit_y, FB_MAX_BLIT_HEIGHT); /* access methods */ void (*writeio)(struct fb_info *info, void __iomem *dst, void *src, unsigned int size); void (*readio) (struct fb_info *info, void *dst, void __iomem *src, unsigned int size); @@ -686,6 +694,10 @@ extern int fb_deferred_io_fsync(struct file *file, loff_t start, __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) +#define FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(__prefix, __damage_range, __damage_area) \ + __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \ + __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys) + /* * Initializes struct fb_ops for deferred I/O. */ @@ -730,6 +742,15 @@ extern struct fb_info *framebuffer_alloc(size_t size, struct device *dev); extern void framebuffer_release(struct fb_info *info); extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max); +#if IS_ENABLED(CONFIG_FB_BACKLIGHT) +struct backlight_device *fb_bl_device(struct fb_info *info); +#else +static inline struct backlight_device *fb_bl_device(struct fb_info *info) +{ + return NULL; +} +#endif + /* fbmon.c */ #define FB_MAXTIMINGS 0 #define FB_VSYNCTIMINGS 1 @@ -840,14 +861,7 @@ extern int fb_find_mode(struct fb_var_screeninfo *var, const struct fb_videomode *default_mode, unsigned int default_bpp); -#if defined(CONFIG_VIDEO_NOMODESET) bool fb_modesetting_disabled(const char *drvname); -#else -static inline bool fb_modesetting_disabled(const char *drvname) -{ - return false; -} -#endif /* * Convenience logging macros diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 78c8326d74ae..2944d4aa413b 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -33,16 +33,6 @@ struct fdtable { struct rcu_head rcu; }; -static inline bool close_on_exec(unsigned int fd, const struct fdtable *fdt) -{ - return test_bit(fd, fdt->close_on_exec); -} - -static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt) -{ - return test_bit(fd, fdt->open_fds); -} - /* * Open file table structure */ @@ -107,6 +97,11 @@ struct file *lookup_fdget_rcu(unsigned int fd); struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd); struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd); +static inline bool close_on_exec(unsigned int fd, const struct files_struct *files) +{ + return test_bit(fd, files_fdtable(files)->close_on_exec); +} + struct task_struct; void put_files_struct(struct files_struct *fs); diff --git a/include/linux/file.h b/include/linux/file.h index 6834a29338c4..45d0f4800abd 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -24,6 +24,8 @@ struct inode; struct path; extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *, const char *, int flags, const struct file_operations *); +extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount *, + const char *, int flags, const struct file_operations *); extern struct file *alloc_file_clone(struct file *, int flags, const struct file_operations *); @@ -82,6 +84,7 @@ static inline void fdput_pos(struct fd f) } DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd) +DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd) extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 95e868e09e29..daee999d05f3 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -27,6 +27,7 @@ #define FILE_LOCK_DEFERRED 1 struct file_lock; +struct file_lease; struct file_lock_operations { void (*fl_copy_lock)(struct file_lock *, struct file_lock *); @@ -39,14 +40,17 @@ struct lock_manager_operations { void (*lm_put_owner)(fl_owner_t); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, int); - bool (*lm_break)(struct file_lock *); - int (*lm_change)(struct file_lock *, int, struct list_head *); - void (*lm_setup)(struct file_lock *, void **); - bool (*lm_breaker_owns_lease)(struct file_lock *); bool (*lm_lock_expirable)(struct file_lock *cfl); void (*lm_expire_lock)(void); }; +struct lease_manager_operations { + bool (*lm_break)(struct file_lease *); + int (*lm_change)(struct file_lease *, int, struct list_head *); + void (*lm_setup)(struct file_lease *, void **); + bool (*lm_breaker_owns_lease)(struct file_lease *); +}; + struct lock_manager { struct list_head list; /* @@ -85,31 +89,31 @@ bool opens_in_grace(struct net *); * * Obviously, the last two criteria only matter for POSIX locks. */ -struct file_lock { - struct file_lock *fl_blocker; /* The lock, that is blocking us */ - struct list_head fl_list; /* link into file_lock_context */ - struct hlist_node fl_link; /* node in global lists */ - struct list_head fl_blocked_requests; /* list of requests with + +struct file_lock_core { + struct file_lock_core *flc_blocker; /* The lock that is blocking us */ + struct list_head flc_list; /* link into file_lock_context */ + struct hlist_node flc_link; /* node in global lists */ + struct list_head flc_blocked_requests; /* list of requests with * ->fl_blocker pointing here */ - struct list_head fl_blocked_member; /* node in + struct list_head flc_blocked_member; /* node in * ->fl_blocker->fl_blocked_requests */ - fl_owner_t fl_owner; - unsigned int fl_flags; - unsigned char fl_type; - unsigned int fl_pid; - int fl_link_cpu; /* what cpu's list is this on? */ - wait_queue_head_t fl_wait; - struct file *fl_file; + fl_owner_t flc_owner; + unsigned int flc_flags; + unsigned char flc_type; + pid_t flc_pid; + int flc_link_cpu; /* what cpu's list is this on? */ + wait_queue_head_t flc_wait; + struct file *flc_file; +}; + +struct file_lock { + struct file_lock_core c; loff_t fl_start; loff_t fl_end; - struct fasync_struct * fl_fasync; /* for lease break notifications */ - /* for lease breaks: */ - unsigned long fl_break_time; - unsigned long fl_downgrade_time; - const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { @@ -126,6 +130,15 @@ struct file_lock { } fl_u; } __randomize_layout; +struct file_lease { + struct file_lock_core c; + struct fasync_struct * fl_fasync; /* for lease break notifications */ + /* for lease breaks: */ + unsigned long fl_break_time; + unsigned long fl_downgrade_time; + const struct lease_manager_operations *fl_lmops; /* Callbacks for lease managers */ +} __randomize_layout; + struct file_lock_context { spinlock_t flc_lock; struct list_head flc_flock; @@ -147,11 +160,31 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int, int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return fl->c.flc_type == F_UNLCK; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return fl->c.flc_type == F_RDLCK; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return fl->c.flc_type == F_WRLCK; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ + wake_up(&fl->c.flc_wait); +} + /* fs/locks.c */ void locks_free_lock_context(struct inode *inode); void locks_free_lock(struct file_lock *fl); void locks_init_lock(struct file_lock *); -struct file_lock * locks_alloc_lock(void); +struct file_lock *locks_alloc_lock(void); void locks_copy_lock(struct file_lock *, struct file_lock *); void locks_copy_conflock(struct file_lock *, struct file_lock *); void locks_remove_posix(struct file *, fl_owner_t); @@ -165,11 +198,16 @@ int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_l int vfs_cancel_lock(struct file *filp, struct file_lock *fl); bool vfs_inode_has_locks(struct inode *inode); int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); + +void locks_init_lease(struct file_lease *); +void locks_free_lease(struct file_lease *fl); +struct file_lease *locks_alloc_lease(void); int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); void lease_get_mtime(struct inode *, struct timespec64 *time); -int generic_setlease(struct file *, int, struct file_lock **, void **priv); -int vfs_setlease(struct file *, int, struct file_lock **, void **); -int lease_modify(struct file_lock *, int, struct list_head *); +int generic_setlease(struct file *, int, struct file_lease **, void **priv); +int kernel_setlease(struct file *, int, struct file_lease **, void **); +int vfs_setlease(struct file *, int, struct file_lease **, void **); +int lease_modify(struct file_lease *, int, struct list_head *); struct notifier_block; int lease_register_notifier(struct notifier_block *); @@ -223,6 +261,25 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline bool lock_is_unlock(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_read(struct file_lock *fl) +{ + return false; +} + +static inline bool lock_is_write(struct file_lock *fl) +{ + return false; +} + +static inline void locks_wake_up(struct file_lock *fl) +{ +} + static inline void locks_free_lock_context(struct inode *inode) { @@ -233,6 +290,11 @@ static inline void locks_init_lock(struct file_lock *fl) return; } +static inline void locks_init_lease(struct file_lease *fl) +{ + return; +} + static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { return; @@ -307,18 +369,24 @@ static inline void lease_get_mtime(struct inode *inode, } static inline int generic_setlease(struct file *filp, int arg, - struct file_lock **flp, void **priv) + struct file_lease **flp, void **priv) +{ + return -EINVAL; +} + +static inline int kernel_setlease(struct file *filp, int arg, + struct file_lease **lease, void **priv) { return -EINVAL; } static inline int vfs_setlease(struct file *filp, int arg, - struct file_lock **lease, void **priv) + struct file_lease **lease, void **priv) { return -EINVAL; } -static inline int lease_modify(struct file_lock *fl, int arg, +static inline int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) { return -EINVAL; @@ -341,6 +409,9 @@ locks_inode_context(const struct inode *inode) #endif /* !CONFIG_FILE_LOCKING */ +/* for walking lists of file_locks linked by fl_list */ +#define for_each_file_lock(_fl, _head) list_for_each_entry(_fl, _head, c.flc_list) + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); diff --git a/include/linux/filter.h b/include/linux/filter.h index 68fb6c8142fe..0f12cf01070e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -72,6 +72,12 @@ struct ctl_table_header; /* unused opcode to mark special ldsx instruction. Same as BPF_IND */ #define BPF_PROBE_MEMSX 0x40 +/* unused opcode to mark special load instruction. Same as BPF_MSH */ +#define BPF_PROBE_MEM32 0xa0 + +/* unused opcode to mark special atomic instruction */ +#define BPF_PROBE_ATOMIC 0xe0 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 @@ -175,6 +181,25 @@ struct ctl_table_header; .off = 0, \ .imm = 0 }) +/* Special (internal-only) form of mov, used to resolve per-CPU addrs: + * dst_reg = src_reg + <percpu_base_off> + * BPF_ADDR_PERCPU is used as a special insn->off value. + */ +#define BPF_ADDR_PERCPU (-1) + +#define BPF_MOV64_PERCPU_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = BPF_ADDR_PERCPU, \ + .imm = 0 }) + +static inline bool insn_is_mov_percpu_addr(const struct bpf_insn *insn) +{ + return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU; +} + /* Short form of mov, dst_reg = imm32 */ #define BPF_MOV64_IMM(DST, IMM) \ @@ -225,6 +250,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) return insn->code == (BPF_ALU | BPF_MOV | BPF_X) && insn->imm == 1; } +/* addr_space_cast from as(0) to as(1) is for converting bpf arena pointers + * to pointers in user vma. + */ +static inline bool insn_is_cast_user(const struct bpf_insn *insn) +{ + return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn->off == BPF_ADDR_SPACE_CAST && + insn->imm == 1U << 16; +} + /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) @@ -547,24 +582,27 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ u64, __ur_3, u64, __ur_4, u64, __ur_5) -#define BPF_CALL_x(x, name, ...) \ +#define BPF_CALL_x(x, attr, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ - u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + attr u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) -#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) -#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) -#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) -#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) -#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) -#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) +#define __NOATTR +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, __NOATTR, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, __NOATTR, name, __VA_ARGS__) + +#define NOTRACE_BPF_CALL_1(name, ...) BPF_CALL_x(1, notrace, name, __VA_ARGS__) #define bpf_ctx_range(TYPE, MEMBER) \ offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1 @@ -638,14 +676,16 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, cant_migrate(); if (static_branch_unlikely(&bpf_stats_enabled_key)) { struct bpf_prog_stats *stats; - u64 start = sched_clock(); + u64 duration, start = sched_clock(); unsigned long flags; ret = dfunc(ctx, prog->insnsi, prog->bpf_func); + + duration = sched_clock() - start; stats = this_cpu_ptr(prog->stats); flags = u64_stats_update_begin_irqsave(&stats->syncp); u64_stats_inc(&stats->cnt); - u64_stats_add(&stats->nsecs, sched_clock() - start); + u64_stats_add(&stats->nsecs, duration); u64_stats_update_end_irqrestore(&stats->syncp, flags); } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); @@ -881,20 +921,22 @@ bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) -static inline void bpf_prog_lock_ro(struct bpf_prog *fp) +static inline int __must_check bpf_prog_lock_ro(struct bpf_prog *fp) { #ifndef CONFIG_BPF_JIT_ALWAYS_ON if (!fp->jited) { set_vm_flush_reset_perms(fp); - set_memory_ro((unsigned long)fp, fp->pages); + return set_memory_ro((unsigned long)fp, fp->pages); } #endif + return 0; } -static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) +static inline int __must_check +bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); - set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); + return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); @@ -951,10 +993,16 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_inlines_helper_call(s32 imm); bool bpf_jit_supports_subprog_tailcalls(void); +bool bpf_jit_supports_percpu_insn(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); bool bpf_jit_supports_exceptions(void); +bool bpf_jit_supports_ptr_xchg(void); +bool bpf_jit_supports_arena(void); +bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); +u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); @@ -1139,7 +1187,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog) return false; if (!bpf_jit_harden) return false; - if (bpf_jit_harden == 1 && bpf_capable()) + if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF)) return false; return true; diff --git a/include/linux/find.h b/include/linux/find.h index 5e4f39ef2e72..5dfca4225fef 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -29,6 +29,8 @@ unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsign unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); +unsigned long _find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2, + const unsigned long *addr3, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); @@ -220,7 +222,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) * idx = find_first_bit(addr, size); * * Returns the bit number of the N'th set bit. - * If no such, returns @size. + * If no such, returns >= @size. */ static inline unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) @@ -345,6 +347,31 @@ unsigned long find_first_and_bit(const unsigned long *addr1, } #endif +/** + * find_first_and_and_bit - find the first set bit in 3 memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @addr3: The third address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the first set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & *addr3 & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_and_and_bit(addr1, addr2, addr3, size); +} + #ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region @@ -405,7 +432,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, { unsigned long bit = find_next_and_bit(addr1, addr2, size, offset); - if (bit < size) + if (bit < size || offset == 0) return bit; bit = find_first_and_bit(addr1, addr2, offset); @@ -413,8 +440,8 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1, } /** - * find_next_bit_wrap - find the next set bit in both memory regions - * @addr: The first address to base the search on + * find_next_bit_wrap - find the next set bit in a memory region + * @addr: The address to base the search on * @size: The bitmap size in bits * @offset: The bitnumber to start searching at * @@ -427,7 +454,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr, { unsigned long bit = find_next_bit(addr, size, offset); - if (bit < size) + if (bit < size || offset == 0) return bit; bit = find_first_bit(addr, offset); diff --git a/include/linux/firewire.h b/include/linux/firewire.h index dd9f2d765e68..00abe0e5d602 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -463,7 +463,8 @@ struct fw_iso_packet { u32 tag:2; /* tx: Tag in packet header */ u32 sy:4; /* tx: Sy in packet header */ u32 header_length:8; /* Length of immediate header */ - u32 header[]; /* tx: Top of 1394 isoch. data_block */ + /* tx: Top of 1394 isoch. data_block */ + u32 header[] __counted_by(header_length); }; #define FW_ISO_CONTEXT_TRANSMIT 0 diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 0311858b46ce..f026f8926d79 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/compiler.h> +#include <linux/cleanup.h> #include <linux/gfp.h> #define FW_ACTION_NOUEVENT 0 @@ -198,4 +199,6 @@ static inline void firmware_upload_unregister(struct fw_upload *fw_upload) int firmware_request_cache(struct device *device, const char *name); +DEFINE_FREE(firmware, struct firmware *, release_firmware(_T)) + #endif diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 29cd11d5a3cf..82687e07a7c2 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -123,7 +123,6 @@ struct cs_dsp_client_ops; * @sysclk_mask: Mask of frequency bits within sysclk register (ADSP1 only) * @sysclk_shift: Shift of frequency bits within sysclk register (ADSP1 only) * @alg_regions: List of currently loaded algorithm regions - * @fw_file_name: Filename of the current firmware * @fw_name: Name of the current firmware * @fw_id: ID of the current firmware, obtained from the wmfw * @fw_id_version: Version of the firmware, obtained from the wmfw @@ -239,8 +238,12 @@ void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp); int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl, unsigned int event_id); int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, const void *buf, size_t len); +int cs_dsp_coeff_lock_and_write_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, + const void *buf, size_t len); int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, void *buf, size_t len); +int cs_dsp_coeff_lock_and_read_ctrl(struct cs_dsp_coeff_ctl *ctl, unsigned int off, + void *buf, size_t len); struct cs_dsp_coeff_ctl *cs_dsp_get_ctl(struct cs_dsp *dsp, const char *name, int type, unsigned int alg); diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h index 5c28298a98be..366243ee9609 100644 --- a/include/linux/firmware/qcom/qcom_qseecom.h +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -10,6 +10,7 @@ #define __QCOM_QSEECOM_H #include <linux/auxiliary_bus.h> +#include <linux/dma-mapping.h> #include <linux/types.h> #include <linux/firmware/qcom/qcom_scm.h> @@ -25,11 +26,56 @@ struct qseecom_client { }; /** + * qseecom_scm_dev() - Get the SCM device associated with the QSEECOM client. + * @client: The QSEECOM client device. + * + * Returns the SCM device under which the provided QSEECOM client device + * operates. This function is intended to be used for DMA allocations. + */ +static inline struct device *qseecom_scm_dev(struct qseecom_client *client) +{ + return client->aux_dev.dev.parent->parent; +} + +/** + * qseecom_dma_alloc() - Allocate DMA memory for a QSEECOM client. + * @client: The QSEECOM client to allocate the memory for. + * @size: The number of bytes to allocate. + * @dma_handle: Pointer to where the DMA address should be stored. + * @gfp: Allocation flags. + * + * Wrapper function for dma_alloc_coherent(), allocating DMA memory usable for + * TZ/QSEECOM communication. Refer to dma_alloc_coherent() for details. + */ +static inline void *qseecom_dma_alloc(struct qseecom_client *client, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + return dma_alloc_coherent(qseecom_scm_dev(client), size, dma_handle, gfp); +} + +/** + * dma_free_coherent() - Free QSEECOM DMA memory. + * @client: The QSEECOM client for which the memory has been allocated. + * @size: The number of bytes allocated. + * @cpu_addr: Virtual memory address to free. + * @dma_handle: DMA memory address to free. + * + * Wrapper function for dma_free_coherent(), freeing memory previously + * allocated with qseecom_dma_alloc(). Refer to dma_free_coherent() for + * details. + */ +static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_coherent(qseecom_scm_dev(client), size, cpu_addr, dma_handle); +} + +/** * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. * @client: The QSEECOM client associated with the target app. - * @req: Request buffer sent to the app (must be DMA-mappable). + * @req: DMA address of the request buffer sent to the app. * @req_size: Size of the request buffer. - * @rsp: Response buffer, written to by the app (must be DMA-mappable). + * @rsp: DMA address of the response buffer, written to by the app. * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given client and read @@ -43,8 +89,9 @@ struct qseecom_client { * * Return: Zero on success, nonzero on failure. */ -static inline int qcom_qseecom_app_send(struct qseecom_client *client, void *req, size_t req_size, - void *rsp, size_t rsp_size) +static inline int qcom_qseecom_app_send(struct qseecom_client *client, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); } diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index ccaf28846054..aaa19f93ac43 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -118,8 +118,8 @@ bool qcom_scm_lmh_dcvsh_available(void); #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, void *rsp, - size_t rsp_size); +int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size); #else /* CONFIG_QCOM_QSEECOM */ @@ -128,9 +128,9 @@ static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) return -EINVAL; } -static inline int qcom_scm_qseecom_app_send(u32 app_id, void *req, - size_t req_size, void *rsp, - size_t rsp_size) +static inline int qcom_scm_qseecom_app_send(u32 app_id, + dma_addr_t req, size_t req_size, + dma_addr_t rsp, size_t rsp_size) { return -EINVAL; } diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 9a7e52739251..1a069a56c961 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -3,6 +3,7 @@ * Xilinx Zynq MPSoC Firmware layer * * Copyright (C) 2014-2021 Xilinx + * Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. * * Michal Simek <michal.simek@amd.com> * Davorin Mista <davorin.mista@aggios.com> @@ -171,6 +172,7 @@ enum pm_api_id { PM_CLOCK_GETPARENT = 44, PM_FPGA_READ = 46, PM_SECURE_AES = 47, + PM_EFUSE_ACCESS = 53, PM_FEATURE_CHECK = 63, }; @@ -562,6 +564,7 @@ int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities, const u32 qos, const enum zynqmp_pm_request_ack ack); int zynqmp_pm_aes_engine(const u64 address, u32 *out); +int zynqmp_pm_efuse_access(const u64 address, u32 *out); int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_load(const u64 address, const u32 size, const u32 flags); int zynqmp_pm_fpga_get_status(u32 *value); @@ -749,6 +752,11 @@ static inline int zynqmp_pm_aes_engine(const u64 address, u32 *out) return -ENODEV; } +static inline int zynqmp_pm_efuse_access(const u64 address, u32 *out) +{ + return -ENODEV; +} + static inline int zynqmp_pm_sha_hash(const u64 address, const u32 size, const u32 flags) { diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h index 3e378b1fb0bc..e9a72fd0bfe7 100644 --- a/include/linux/flex_proportions.h +++ b/include/linux/flex_proportions.h @@ -39,38 +39,6 @@ void fprop_global_destroy(struct fprop_global *p); bool fprop_new_period(struct fprop_global *p, int periods); /* - * ---- SINGLE ---- - */ -struct fprop_local_single { - /* the local events counter */ - unsigned long events; - /* Period in which we last updated events */ - unsigned int period; - raw_spinlock_t lock; /* Protect period and numerator */ -}; - -#define INIT_FPROP_LOCAL_SINGLE(name) \ -{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ -} - -int fprop_local_init_single(struct fprop_local_single *pl); -void fprop_local_destroy_single(struct fprop_local_single *pl); -void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl); -void fprop_fraction_single(struct fprop_global *p, - struct fprop_local_single *pl, unsigned long *numerator, - unsigned long *denominator); - -static inline -void fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl) -{ - unsigned long flags; - - local_irq_save(flags); - __fprop_inc_single(p, pl); - local_irq_restore(flags); -} - -/* * ---- PERCPU ---- */ struct fprop_local_percpu { diff --git a/include/linux/font.h b/include/linux/font.h index abf1442ce719..81caffd51bb4 100644 --- a/include/linux/font.h +++ b/include/linux/font.h @@ -57,7 +57,8 @@ extern const struct font_desc *find_font(const char *name); /* Get the default font for a specific screen size */ extern const struct font_desc *get_default_font(int xres, int yres, - u32 font_w, u32 font_h); + unsigned long *font_w, + unsigned long *font_h); /* Max. length for the name of a predefined font */ #define MAX_FONT_NAME 32 diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 89a6888f2f9e..7e0f340bf363 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FORTIFY_STRING_H_ #define _LINUX_FORTIFY_STRING_H_ +#include <linux/bitfield.h> #include <linux/bug.h> #include <linux/const.h> #include <linux/limits.h> @@ -9,7 +10,50 @@ #define __FORTIFY_INLINE extern __always_inline __gnu_inline __overloadable #define __RENAME(x) __asm__(#x) -void fortify_panic(const char *name) __noreturn __cold; +#define FORTIFY_REASON_DIR(r) FIELD_GET(BIT(0), r) +#define FORTIFY_REASON_FUNC(r) FIELD_GET(GENMASK(7, 1), r) +#define FORTIFY_REASON(func, write) (FIELD_PREP(BIT(0), write) | \ + FIELD_PREP(GENMASK(7, 1), func)) + +/* Overridden by KUnit tests. */ +#ifndef fortify_panic +# define fortify_panic(func, write, avail, size, retfail) \ + __fortify_panic(FORTIFY_REASON(func, write), avail, size) +#endif +#ifndef fortify_warn_once +# define fortify_warn_once(x...) WARN_ONCE(x) +#endif + +#define FORTIFY_READ 0 +#define FORTIFY_WRITE 1 + +#define EACH_FORTIFY_FUNC(macro) \ + macro(strncpy), \ + macro(strnlen), \ + macro(strlen), \ + macro(strscpy), \ + macro(strlcat), \ + macro(strcat), \ + macro(strncat), \ + macro(memset), \ + macro(memcpy), \ + macro(memmove), \ + macro(memscan), \ + macro(memcmp), \ + macro(memchr), \ + macro(memchr_inv), \ + macro(kmemdup), \ + macro(strcpy), \ + macro(UNKNOWN), + +#define MAKE_FORTIFY_FUNC(func) FORTIFY_FUNC_##func + +enum fortify_func { + EACH_FORTIFY_FUNC(MAKE_FORTIFY_FUNC) +}; + +void __fortify_report(const u8 reason, const size_t avail, const size_t size); +void __fortify_panic(const u8 reason, const size_t avail, const size_t size) __cold __noreturn; void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)"); void __read_overflow2_field(size_t avail, size_t wanted) __compiletime_warning("detected read beyond size of field (2nd parameter); maybe use struct_group()?"); @@ -31,17 +75,30 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning(" __ret; \ }) -#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) +#if defined(__SANITIZE_ADDRESS__) + +#if !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX) && !defined(CONFIG_GENERIC_ENTRY) +extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset); +extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove); +extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy); +#elif defined(CONFIG_KASAN_GENERIC) +extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(__asan_memset); +extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(__asan_memmove); +extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(__asan_memcpy); +#else /* CONFIG_KASAN_SW_TAGS */ +extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(__hwasan_memset); +extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(__hwasan_memmove); +extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(__hwasan_memcpy); +#endif + extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr); extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp); -extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy); -extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove); -extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset); extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat); extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy); extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen); extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat); extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy); + #else #if defined(__SANITIZE_MEMORY__) @@ -66,6 +123,7 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) #define __underlying_strlen __builtin_strlen #define __underlying_strncat __builtin_strncat #define __underlying_strncpy __builtin_strncpy + #endif /** @@ -143,7 +201,7 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __write_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strncpy, FORTIFY_WRITE, p_size, size, p); return __underlying_strncpy(p, q, size); } @@ -174,7 +232,7 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size /* Do not check characters beyond the end of p. */ ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); if (p_size <= ret && maxlen != ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strnlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } @@ -210,31 +268,13 @@ __kernel_size_t __fortify_strlen(const char * const POS p) return __underlying_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlen, FORTIFY_READ, p_size, ret + 1, ret); return ret; } /* Defined after fortified strnlen() to reuse it. */ -extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); -/** - * strscpy - Copy a C-string into a sized buffer - * - * @p: Where to copy the string to - * @q: Where to copy the string from - * @size: Size of destination buffer - * - * Copy the source string @q, or as much of it as fits, into the destination - * @p buffer. The behavior is undefined if the string buffers overlap. The - * destination @p buffer is always NUL terminated, unless it's zero-sized. - * - * Preferred to strncpy() since it always returns a valid string, and - * doesn't unnecessarily force the tail of the destination buffer to be - * zero padded. If padding is desired please use strscpy_pad(). - * - * Returns the number of characters copied in @p (not including the - * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated. - */ -__FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size) +extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(sized_strscpy); +__FORTIFY_INLINE ssize_t sized_strscpy(char * const POS p, const char * const POS q, size_t size) { /* Use string size rather than possible enclosing struct size. */ const size_t p_size = __member_size(p); @@ -278,8 +318,8 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s * Generate a runtime write overflow error if len is greater than * p_size. */ - if (len > p_size) - fortify_panic(__func__); + if (p_size < len) + fortify_panic(FORTIFY_FUNC_strscpy, FORTIFY_WRITE, p_size, len, -E2BIG); /* * We can now safely call vanilla strscpy because we are protected from: @@ -337,7 +377,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if string is already overflowed. */ if (p_size <= p_len) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_READ, p_size, p_len + 1, wanted); if (actual >= avail) { copy_len = avail - p_len - 1; @@ -346,7 +386,7 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail) /* Give up if copy will overflow. */ if (p_size <= actual) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strlcat, FORTIFY_WRITE, p_size, actual + 1, wanted); __underlying_memcpy(p + p_len, q, copy_len); p[actual] = '\0'; @@ -373,9 +413,10 @@ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { const size_t p_size = __member_size(p); + const size_t wanted = strlcat(p, q, p_size); - if (strlcat(p, q, p_size) >= p_size) - fortify_panic(__func__); + if (p_size <= wanted) + fortify_panic(FORTIFY_FUNC_strcat, FORTIFY_WRITE, p_size, wanted + 1, p); return p; } @@ -404,20 +445,21 @@ char *strncat(char * const POS p, const char * const POS q, __kernel_size_t coun { const size_t p_size = __member_size(p); const size_t q_size = __member_size(q); - size_t p_len, copy_len; + size_t p_len, copy_len, total; if (p_size == SIZE_MAX && q_size == SIZE_MAX) return __underlying_strncat(p, q, count); p_len = strlen(p); copy_len = strnlen(q, count); - if (p_size < p_len + copy_len + 1) - fortify_panic(__func__); + total = p_len + copy_len + 1; + if (p_size < total) + fortify_panic(FORTIFY_FUNC_strncat, FORTIFY_WRITE, p_size, total, p); __underlying_memcpy(p + p_len, q, copy_len); p[p_len + copy_len] = '\0'; return p; } -__FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, +__FORTIFY_INLINE bool fortify_memset_chk(__kernel_size_t size, const size_t p_size, const size_t p_size_field) { @@ -452,7 +494,8 @@ __FORTIFY_INLINE void fortify_memset_chk(__kernel_size_t size, * lengths are unknown.) */ if (p_size != SIZE_MAX && p_size < size) - fortify_panic("memset"); + fortify_panic(FORTIFY_FUNC_memset, FORTIFY_WRITE, p_size, size, true); + return false; } #define __fortify_memset_chk(p, c, size, p_size, p_size_field) ({ \ @@ -506,7 +549,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t q_size, const size_t p_size_field, const size_t q_size_field, - const char *func) + const u8 func) { if (__builtin_constant_p(size)) { /* @@ -550,9 +593,10 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, * (The SIZE_MAX test is to optimize away checks where the buffer * lengths are unknown.) */ - if ((p_size != SIZE_MAX && p_size < size) || - (q_size != SIZE_MAX && q_size < size)) - fortify_panic(func); + if (p_size != SIZE_MAX && p_size < size) + fortify_panic(func, FORTIFY_WRITE, p_size, size, true); + else if (q_size != SIZE_MAX && q_size < size) + fortify_panic(func, FORTIFY_READ, p_size, size, true); /* * Warn when writing beyond destination field size. @@ -583,9 +627,9 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, const size_t __q_size = (q_size); \ const size_t __p_size_field = (p_size_field); \ const size_t __q_size_field = (q_size_field); \ - WARN_ONCE(fortify_memcpy_chk(__fortify_size, __p_size, \ + fortify_warn_once(fortify_memcpy_chk(__fortify_size, __p_size, \ __q_size, __p_size_field, \ - __q_size_field, #op), \ + __q_size_field, FORTIFY_FUNC_ ##op), \ #op ": detected field-spanning write (size %zu) of single %s (size %zu)\n", \ __fortify_size, \ "field \"" #p "\" at " FILE_LINE, \ @@ -652,7 +696,7 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memscan, FORTIFY_READ, p_size, size, NULL); return __real_memscan(p, c, size); } @@ -668,8 +712,10 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t if (__compiletime_lessthan(q_size, size)) __read_overflow2(); } - if (p_size < size || q_size < size) - fortify_panic(__func__); + if (p_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, p_size, size, INT_MIN); + else if (q_size < size) + fortify_panic(FORTIFY_FUNC_memcmp, FORTIFY_READ, q_size, size, INT_MIN); return __underlying_memcmp(p, q, size); } @@ -681,7 +727,7 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr, FORTIFY_READ, p_size, size, NULL); return __underlying_memchr(p, c, size); } @@ -693,22 +739,24 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_memchr_inv, FORTIFY_READ, p_size, size, NULL); return __real_memchr_inv(p, c, size); } -extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup) +extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup_noprof) __realloc_size(2); -__FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) +__FORTIFY_INLINE void *kmemdup_noprof(const void * const POS0 p, size_t size, gfp_t gfp) { const size_t p_size = __struct_size(p); if (__compiletime_lessthan(p_size, size)) __read_overflow(); if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_kmemdup, FORTIFY_READ, p_size, size, + __real_kmemdup(p, 0, gfp)); return __real_kmemdup(p, size, gfp); } +#define kmemdup(...) alloc_hooks(kmemdup_noprof(__VA_ARGS__)) /** * strcpy - Copy a string into another string buffer @@ -743,7 +791,7 @@ char *strcpy(char * const POS p, const char * const POS q) __write_overflow(); /* Run-time check for dynamic size overflow. */ if (p_size < size) - fortify_panic(__func__); + fortify_panic(FORTIFY_FUNC_strcpy, FORTIFY_WRITE, p_size, size, p); __underlying_memcpy(p, q, size); return p; } diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h index 223da48a6d18..94c4edd047e5 100644 --- a/include/linux/fpga/fpga-bridge.h +++ b/include/linux/fpga/fpga-bridge.h @@ -45,6 +45,7 @@ struct fpga_bridge_info { * @dev: FPGA bridge device * @mutex: enforces exclusive reference to bridge * @br_ops: pointer to struct of FPGA bridge ops + * @br_ops_owner: module containing the br_ops * @info: fpga image specific information * @node: FPGA bridge list node * @priv: low level driver private date @@ -54,6 +55,7 @@ struct fpga_bridge { struct device dev; struct mutex mutex; /* for exclusive reference to bridge */ const struct fpga_bridge_ops *br_ops; + struct module *br_ops_owner; struct fpga_image_info *info; struct list_head node; void *priv; @@ -79,10 +81,12 @@ int of_fpga_bridge_get_to_list(struct device_node *np, struct fpga_image_info *info, struct list_head *bridge_list); +#define fpga_bridge_register(parent, name, br_ops, priv) \ + __fpga_bridge_register(parent, name, br_ops, priv, THIS_MODULE) struct fpga_bridge * -fpga_bridge_register(struct device *parent, const char *name, - const struct fpga_bridge_ops *br_ops, - void *priv); +__fpga_bridge_register(struct device *parent, const char *name, + const struct fpga_bridge_ops *br_ops, void *priv, + struct module *owner); void fpga_bridge_unregister(struct fpga_bridge *br); #endif /* _LINUX_FPGA_BRIDGE_H */ diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 54f63459efd6..0d4fe068f3d8 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -201,6 +201,7 @@ struct fpga_manager_ops { * @state: state of fpga manager * @compat_id: FPGA manager id for compatibility check. * @mops: pointer to struct of fpga manager ops + * @mops_owner: module containing the mops * @priv: low level driver private date */ struct fpga_manager { @@ -210,6 +211,7 @@ struct fpga_manager { enum fpga_mgr_states state; struct fpga_compat_id *compat_id; const struct fpga_manager_ops *mops; + struct module *mops_owner; void *priv; }; @@ -230,18 +232,30 @@ struct fpga_manager *fpga_mgr_get(struct device *dev); void fpga_mgr_put(struct fpga_manager *mgr); +#define fpga_mgr_register_full(parent, info) \ + __fpga_mgr_register_full(parent, info, THIS_MODULE) struct fpga_manager * -fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info); +__fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info, + struct module *owner); +#define fpga_mgr_register(parent, name, mops, priv) \ + __fpga_mgr_register(parent, name, mops, priv, THIS_MODULE) struct fpga_manager * -fpga_mgr_register(struct device *parent, const char *name, - const struct fpga_manager_ops *mops, void *priv); +__fpga_mgr_register(struct device *parent, const char *name, + const struct fpga_manager_ops *mops, void *priv, struct module *owner); + void fpga_mgr_unregister(struct fpga_manager *mgr); +#define devm_fpga_mgr_register_full(parent, info) \ + __devm_fpga_mgr_register_full(parent, info, THIS_MODULE) struct fpga_manager * -devm_fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info); +__devm_fpga_mgr_register_full(struct device *parent, const struct fpga_manager_info *info, + struct module *owner); +#define devm_fpga_mgr_register(parent, name, mops, priv) \ + __devm_fpga_mgr_register(parent, name, mops, priv, THIS_MODULE) struct fpga_manager * -devm_fpga_mgr_register(struct device *parent, const char *name, - const struct fpga_manager_ops *mops, void *priv); +__devm_fpga_mgr_register(struct device *parent, const char *name, + const struct fpga_manager_ops *mops, void *priv, + struct module *owner); #endif /*_LINUX_FPGA_MGR_H */ diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h index 9d4d32909340..5fbc05fe70a6 100644 --- a/include/linux/fpga/fpga-region.h +++ b/include/linux/fpga/fpga-region.h @@ -36,6 +36,7 @@ struct fpga_region_info { * @mgr: FPGA manager * @info: FPGA image info * @compat_id: FPGA region id for compatibility check. + * @ops_owner: module containing the get_bridges function * @priv: private data * @get_bridges: optional function to get bridges to a list */ @@ -46,6 +47,7 @@ struct fpga_region { struct fpga_manager *mgr; struct fpga_image_info *info; struct fpga_compat_id *compat_id; + struct module *ops_owner; void *priv; int (*get_bridges)(struct fpga_region *region); }; @@ -58,12 +60,17 @@ fpga_region_class_find(struct device *start, const void *data, int fpga_region_program_fpga(struct fpga_region *region); +#define fpga_region_register_full(parent, info) \ + __fpga_region_register_full(parent, info, THIS_MODULE) struct fpga_region * -fpga_region_register_full(struct device *parent, const struct fpga_region_info *info); +__fpga_region_register_full(struct device *parent, const struct fpga_region_info *info, + struct module *owner); +#define fpga_region_register(parent, mgr, get_bridges) \ + __fpga_region_register(parent, mgr, get_bridges, THIS_MODULE) struct fpga_region * -fpga_region_register(struct device *parent, struct fpga_manager *mgr, - int (*get_bridges)(struct fpga_region *)); +__fpga_region_register(struct device *parent, struct fpga_manager *mgr, + int (*get_bridges)(struct fpga_region *), struct module *owner); void fpga_region_unregister(struct fpga_region *region); #endif /* _FPGA_REGION_H */ diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index 3e03758151f4..f39869588117 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -7,6 +7,16 @@ #include <linux/ftrace.h> #include <linux/rethook.h> +struct fprobe; + +typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip, + unsigned long ret_ip, struct pt_regs *regs, + void *entry_data); + +typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip, + unsigned long ret_ip, struct pt_regs *regs, + void *entry_data); + /** * struct fprobe - ftrace based probe. * @ops: The ftrace_ops. @@ -34,12 +44,8 @@ struct fprobe { size_t entry_data_size; int nr_maxactive; - int (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, - void *entry_data); - void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, - void *entry_data); + fprobe_entry_cb entry_handler; + fprobe_exit_cb exit_handler; }; /* This fprobe is soft-disabled. */ diff --git a/include/linux/fpu.h b/include/linux/fpu.h new file mode 100644 index 000000000000..2fb63e22913b --- /dev/null +++ b/include/linux/fpu.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_FPU_H +#define _LINUX_FPU_H + +#ifdef _LINUX_FPU_COMPILATION_UNIT +#error FP code must be compiled separately. See Documentation/core-api/floating-point.rst. +#endif + +#include <asm/fpu.h> + +#endif diff --git a/include/linux/framer/framer-provider.h b/include/linux/framer/framer-provider.h index 782cd5fc83d5..9724d4b44b9c 100644 --- a/include/linux/framer/framer-provider.h +++ b/include/linux/framer/framer-provider.h @@ -83,7 +83,6 @@ struct framer_ops { /** * struct framer_provider - represents the framer provider * @dev: framer provider device - * @children: can be used to override the default (dev->of_node) child node * @owner: the module owner having of_xlate * @list: to maintain a linked list of framer providers * @of_xlate: function pointer to obtain framer instance from framer pointer @@ -93,7 +92,7 @@ struct framer_provider { struct module *owner; struct list_head list; struct framer * (*of_xlate)(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); }; static inline void framer_set_drvdata(struct framer *framer, void *data) @@ -118,19 +117,19 @@ struct framer *devm_framer_create(struct device *dev, struct device_node *node, const struct framer_ops *ops); struct framer *framer_provider_simple_of_xlate(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); struct framer_provider * __framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); void framer_provider_of_unregister(struct framer_provider *framer_provider); struct framer_provider * __devm_framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); void framer_notify_status_change(struct framer *framer); @@ -154,7 +153,7 @@ static inline struct framer *devm_framer_create(struct device *dev, struct devic } static inline struct framer *framer_provider_simple_of_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { return ERR_PTR(-ENOSYS); } @@ -162,7 +161,7 @@ static inline struct framer *framer_provider_simple_of_xlate(struct device *dev, static inline struct framer_provider * __framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } @@ -174,7 +173,7 @@ void framer_provider_of_unregister(struct framer_provider *framer_provider) static inline struct framer_provider * __devm_framer_provider_of_register(struct device *dev, struct module *owner, struct framer *(*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h index 9a9b88962c29..2b85fe9e7f9a 100644 --- a/include/linux/framer/framer.h +++ b/include/linux/framer/framer.h @@ -181,12 +181,12 @@ static inline int framer_notifier_unregister(struct framer *framer, return -ENOSYS; } -struct framer *framer_get(struct device *dev, const char *con_id) +static inline struct framer *framer_get(struct device *dev, const char *con_id) { return ERR_PTR(-ENOSYS); } -void framer_put(struct device *dev, struct framer *framer) +static inline void framer_put(struct device *dev, struct framer *framer) { } diff --git a/include/linux/fs.h b/include/linux/fs.h index 1fbc72c5f112..0283cf366c2a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,8 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#include <linux/maple_tree.h> +#include <linux/rw_hint.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -71,6 +73,8 @@ struct fscrypt_inode_info; struct fscrypt_operations; struct fsverity_info; struct fsverity_operations; +struct fsnotify_mark_connector; +struct fsnotify_sb_info; struct fs_context; struct fs_parameter_spec; struct fileattr; @@ -108,21 +112,26 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, */ /* file is open for reading */ -#define FMODE_READ ((__force fmode_t)0x1) +#define FMODE_READ ((__force fmode_t)(1 << 0)) /* file is open for writing */ -#define FMODE_WRITE ((__force fmode_t)0x2) +#define FMODE_WRITE ((__force fmode_t)(1 << 1)) /* file is seekable */ -#define FMODE_LSEEK ((__force fmode_t)0x4) +#define FMODE_LSEEK ((__force fmode_t)(1 << 2)) /* file can be accessed using pread */ -#define FMODE_PREAD ((__force fmode_t)0x8) +#define FMODE_PREAD ((__force fmode_t)(1 << 3)) /* file can be accessed using pwrite */ -#define FMODE_PWRITE ((__force fmode_t)0x10) +#define FMODE_PWRITE ((__force fmode_t)(1 << 4)) /* File is opened for execution with sys_execve / sys_uselib */ -#define FMODE_EXEC ((__force fmode_t)0x20) +#define FMODE_EXEC ((__force fmode_t)(1 << 5)) +/* File writes are restricted (block device specific) */ +#define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6)) + +/* FMODE_* bits 7 to 8 */ + /* 32bit hashes as llseek() offset (for directories) */ -#define FMODE_32BITHASH ((__force fmode_t)0x200) +#define FMODE_32BITHASH ((__force fmode_t)(1 << 9)) /* 64bit hashes as llseek() offset (for directories) */ -#define FMODE_64BITHASH ((__force fmode_t)0x400) +#define FMODE_64BITHASH ((__force fmode_t)(1 << 10)) /* * Don't update ctime and mtime. @@ -130,60 +139,53 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, * Currently a special hack for the XFS open_by_handle ioctl, but we'll * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. */ -#define FMODE_NOCMTIME ((__force fmode_t)0x800) +#define FMODE_NOCMTIME ((__force fmode_t)(1 << 11)) /* Expect random access pattern */ -#define FMODE_RANDOM ((__force fmode_t)0x1000) +#define FMODE_RANDOM ((__force fmode_t)(1 << 12)) /* File is huge (eg. /dev/mem): treat loff_t as unsigned */ -#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) +#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13)) /* File is opened with O_PATH; almost nothing can be done with it */ -#define FMODE_PATH ((__force fmode_t)0x4000) +#define FMODE_PATH ((__force fmode_t)(1 << 14)) /* File needs atomic accesses to f_pos */ -#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) +#define FMODE_ATOMIC_POS ((__force fmode_t)(1 << 15)) /* Write access to underlying fs */ -#define FMODE_WRITER ((__force fmode_t)0x10000) +#define FMODE_WRITER ((__force fmode_t)(1 << 16)) /* Has read method(s) */ -#define FMODE_CAN_READ ((__force fmode_t)0x20000) +#define FMODE_CAN_READ ((__force fmode_t)(1 << 17)) /* Has write method(s) */ -#define FMODE_CAN_WRITE ((__force fmode_t)0x40000) +#define FMODE_CAN_WRITE ((__force fmode_t)(1 << 18)) -#define FMODE_OPENED ((__force fmode_t)0x80000) -#define FMODE_CREATED ((__force fmode_t)0x100000) +#define FMODE_OPENED ((__force fmode_t)(1 << 19)) +#define FMODE_CREATED ((__force fmode_t)(1 << 20)) /* File is stream-like */ -#define FMODE_STREAM ((__force fmode_t)0x200000) +#define FMODE_STREAM ((__force fmode_t)(1 << 21)) /* File supports DIRECT IO */ -#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) +#define FMODE_CAN_ODIRECT ((__force fmode_t)(1 << 22)) -#define FMODE_NOREUSE ((__force fmode_t)0x800000) +#define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) -/* File supports non-exclusive O_DIRECT writes from multiple threads */ -#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000) +/* FMODE_* bit 24 */ /* File is embedded in backing_file object */ -#define FMODE_BACKING ((__force fmode_t)0x2000000) +#define FMODE_BACKING ((__force fmode_t)(1 << 25)) /* File was opened by fanotify and shouldn't generate fanotify events */ -#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) +#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ -#define FMODE_NOWAIT ((__force fmode_t)0x8000000) +#define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) /* File represents mount that needs unmounting */ -#define FMODE_NEED_UNMOUNT ((__force fmode_t)0x10000000) +#define FMODE_NEED_UNMOUNT ((__force fmode_t)(1 << 28)) /* File does not contribute to nr_files count */ -#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) - -/* File supports async buffered reads */ -#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) - -/* File supports async nowait buffered writes */ -#define FMODE_BUF_WASYNC ((__force fmode_t)0x80000000) +#define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) /* * Attribute flags. These should be or-ed together to figure out what @@ -309,19 +311,6 @@ struct address_space; struct writeback_control; struct readahead_control; -/* - * Write life time hint values. - * Stored in struct inode as u8. - */ -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, - WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, - WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, - WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, - WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, -}; - /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC @@ -484,10 +473,10 @@ struct address_space { pgoff_t writeback_index; const struct address_space_operations *a_ops; unsigned long flags; - struct rw_semaphore i_mmap_rwsem; errseq_t wb_err; spinlock_t i_private_lock; struct list_head i_private_list; + struct rw_semaphore i_mmap_rwsem; void * i_private_data; } __attribute__((aligned(sizeof(long)))) __randomize_layout; /* @@ -631,8 +620,6 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 -struct fsnotify_mark_connector; - /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning @@ -679,7 +666,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; - u8 i_write_hint; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED @@ -909,7 +896,8 @@ static inline loff_t i_size_read(const struct inode *inode) preempt_enable(); return i_size; #else - return inode->i_size; + /* Pairs with smp_store_release() in i_size_write() */ + return smp_load_acquire(&inode->i_size); #endif } @@ -931,7 +919,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) inode->i_size = i_size; preempt_enable(); #else - inode->i_size = i_size; + /* + * Pairs with smp_load_acquire() in i_size_read() to ensure + * changes related to inode size (such as page contents) are + * visible before we see the changed inode size. + */ + smp_store_release(&inode->i_size, i_size); #endif } @@ -1038,12 +1031,13 @@ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ - unsigned char f_handle[]; + unsigned char f_handle[] __counted_by(handle_bytes); }; static inline struct file *get_file(struct file *f) { - atomic_long_inc(&f->f_count); + long prior = atomic_long_fetch_inc_relaxed(&f->f_count); + WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n"); return f; } @@ -1066,6 +1060,7 @@ struct file *get_file_active(struct file **f); typedef void *fl_owner_t; struct file_lock; +struct file_lease; /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX @@ -1080,9 +1075,20 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +/* + * file_dentry() is a relic from the days that overlayfs was using files with a + * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs. + * In those days, file_dentry() was needed to get the underlying fs dentry that + * matches f_inode. + * Files with "fake" path should not exist nowadays, so use an assertion to make + * sure that file_dentry() was not papering over filesystem bugs. + */ static inline struct dentry *file_dentry(const struct file *file) { - return d_real(file->f_path.dentry, file_inode(file)); + struct dentry *dentry = file->f_path.dentry; + + WARN_ON_ONCE(d_inode(dentry) != file_inode(file)); + return dentry; } struct fasync_struct { @@ -1168,7 +1174,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 -#define SB_I_EVM_UNSUPPORTED 0x00000080 +#define SB_I_EVM_HMAC_UNSUPPORTED 0x00000080 #define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ #define SB_I_PERSB_BDI 0x00000200 /* has a per-sb bdi */ @@ -1230,8 +1236,8 @@ struct super_block { #endif struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ - struct block_device *s_bdev; - struct bdev_handle *s_bdev_handle; + struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */ + struct file *s_bdev_file; struct backing_dev_info *s_bdi; struct mtd_info *s_mtd; struct hlist_node s_instances; @@ -1242,7 +1248,7 @@ struct super_block { /* * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and - * s_fsnotify_marks together for cache efficiency. They are frequently + * s_fsnotify_info together for cache efficiency. They are frequently * accessed and rarely modified. */ void *s_fs_info; /* Filesystem private info */ @@ -1254,11 +1260,25 @@ struct super_block { time64_t s_time_max; #ifdef CONFIG_FSNOTIFY __u32 s_fsnotify_mask; - struct fsnotify_mark_connector __rcu *s_fsnotify_marks; + struct fsnotify_sb_info *s_fsnotify_info; #endif + /* + * q: why are s_id and s_sysfs_name not the same? both are human + * readable strings that identify the filesystem + * a: s_id is allowed to change at runtime; it's used in log messages, + * and we want to when a device starts out as single device (s_id is dev + * name) but then a device is hot added and we have to switch to + * identifying it by UUID + * but s_sysfs_name is a handle for programmatic access, and can't + * change at runtime + */ char s_id[32]; /* Informational name */ uuid_t s_uuid; /* UUID */ + u8 s_uuid_len; /* Default 16, possibly smaller for weird filesystems */ + + /* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */ + char s_sysfs_name[UUID_STRING_LEN + 1]; unsigned int s_max_links; @@ -1281,12 +1301,6 @@ struct super_block { /* Number of inodes with nlink == 0 but still referenced */ atomic_long_t s_remove_count; - /* - * Number of inode/mount/sb objects that are being watched, note that - * inodes objects are currently double-accounted. - */ - atomic_long_t s_fsnotify_connectors; - /* Read-only state of the superblock is being changed */ int s_readonly_remount; @@ -1885,7 +1899,7 @@ struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, umode_t mode, int open_flag, const struct cred *cred); struct file *kernel_file_open(const struct path *path, int flags, - struct inode *inode, const struct cred *cred); + const struct cred *cred); int vfs_mkobj(struct dentry *, umode_t, int (*f)(struct dentry *, umode_t, void *), @@ -1980,8 +1994,11 @@ struct iov_iter; struct io_uring_cmd; struct offset_ctx; +typedef unsigned int __bitwise fop_flags_t; + struct file_operations { struct module *owner; + fop_flags_t fop_flags; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); @@ -1994,7 +2011,6 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); - unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); @@ -2007,7 +2023,7 @@ struct file_operations { ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); void (*splice_eof)(struct file *file); - int (*setlease)(struct file *, int, struct file_lock **, void **); + int (*setlease)(struct file *, int, struct file_lease **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); @@ -2025,6 +2041,17 @@ struct file_operations { unsigned int poll_flags); } __randomize_layout; +/* Supports async buffered reads */ +#define FOP_BUFFER_RASYNC ((__force fop_flags_t)(1 << 0)) +/* Supports async buffered writes */ +#define FOP_BUFFER_WASYNC ((__force fop_flags_t)(1 << 1)) +/* Supports synchronous page faults for mappings */ +#define FOP_MMAP_SYNC ((__force fop_flags_t)(1 << 2)) +/* Supports non-exclusive O_DIRECT writes from multiple threads */ +#define FOP_DIO_PARALLEL_WRITE ((__force fop_flags_t)(1 << 3)) +/* Contains huge pages */ +#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4)) + /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, int (*) (struct file *, struct dir_context *)); @@ -2075,18 +2102,6 @@ struct inode_operations { struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; -static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, - struct iov_iter *iter) -{ - return file->f_op->read_iter(kio, iter); -} - -static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, - struct iov_iter *iter) -{ - return file->f_op->write_iter(kio, iter); -} - static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { return file->f_op->mmap(file, vma); @@ -2096,6 +2111,7 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, loff_t, size_t, unsigned int); +int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write); int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags, @@ -2158,7 +2174,7 @@ struct super_operations { #ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); - struct dquot **(*get_dquots)(struct inode *); + struct dquot __rcu **(*get_dquots)(struct inode *); #endif long (*nr_cached_objects)(struct super_block *, struct shrink_control *); @@ -2230,7 +2246,13 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) + +#ifdef CONFIG_SWAP #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) +#else +#define IS_SWAPFILE(inode) ((void)(inode), 0U) +#endif + #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) @@ -2531,6 +2553,44 @@ extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); +static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len) +{ + if (WARN_ON(len > sizeof(sb->s_uuid))) + len = sizeof(sb->s_uuid); + sb->s_uuid_len = len; + memcpy(&sb->s_uuid, uuid, len); +} + +/* set sb sysfs name based on sb->s_bdev */ +static inline void super_set_sysfs_name_bdev(struct super_block *sb) +{ + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev); +} + +/* set sb sysfs name based on sb->s_uuid */ +static inline void super_set_sysfs_name_uuid(struct super_block *sb) +{ + WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid)); + snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b); +} + +/* set sb sysfs name based on sb->s_id */ +static inline void super_set_sysfs_name_id(struct super_block *sb) +{ + strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name)); +} + +/* try to use something standard before you use this */ +__printf(2, 3) +static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args); + va_end(args); +} + extern int current_umask(void); extern void ihold(struct inode * inode); @@ -2927,6 +2987,17 @@ extern bool path_is_under(const struct path *, const struct path *); extern char *file_path(struct file *, char *, int); +/** + * is_dot_dotdot - returns true only if @name is "." or ".." + * @name: file name to check + * @len: length of file name, in bytes + */ +static inline bool is_dot_dotdot(const char *name, size_t len) +{ + return len && unlikely(name[0] == '.') && + (len == 1 || (len == 2 && name[1] == '.')); +} + #include <linux/err.h> /* needed for stackable file system support */ @@ -3004,6 +3075,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); +extern int file_remove_privs_flags(struct file *file, unsigned int flags); extern int file_remove_privs(struct file *); int setattr_should_drop_sgid(struct mnt_idmap *idmap, const struct inode *inode); @@ -3012,11 +3084,7 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap, * This must be used for allocating filesystems specific inodes to set * up the inode reclaim context correctly. */ -static inline void * -alloc_inode_sb(struct super_block *sb, struct kmem_cache *cache, gfp_t gfp) -{ - return kmem_cache_alloc_lru(cache, &sb->s_inode_lru, gfp); -} +#define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp) extern void __insert_inode_hash(struct inode *, unsigned long hashval); static inline void insert_inode_hash(struct inode *inode) @@ -3237,7 +3305,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); -extern int simple_nosetlease(struct file *, int, struct file_lock **, void **); +extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); @@ -3259,13 +3327,16 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); struct offset_ctx { - struct xarray xa; - u32 next_offset; + struct maple_tree mt; + unsigned long next_offset; }; void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); +int simple_offset_empty(struct dentry *dentry); +int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, @@ -3279,7 +3350,16 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); -extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); +extern void generic_set_sb_d_ops(struct super_block *sb); + +static inline bool sb_has_encoding(const struct super_block *sb) +{ +#if IS_ENABLED(CONFIG_UNICODE) + return !!sb->s_encoding; +#else + return false; +#endif +} int may_setattr(struct mnt_idmap *idmap, struct inode *inode, unsigned int ia_valid); @@ -3334,6 +3414,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; + if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND))) + return -EINVAL; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) @@ -3344,6 +3426,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; + if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) { + if (IS_APPEND(file_inode(ki->ki_filp))) + return -EPERM; + ki->ki_flags &= ~IOCB_APPEND; + } + ki->ki_flags |= kiocb_flags; return 0; } diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 01542c4b87a2..d3350979115f 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -132,4 +132,8 @@ static inline bool fs_validate_description(const char *name, #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) +/* String parameter that allows empty argument */ +#define fsparam_string_empty(NAME, OPT) \ + __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) + #endif /* _LINUX_FS_PARSER_H */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 6e8562cbcc43..9de27643607f 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -172,9 +172,12 @@ extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *); -extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *, - loff_t, size_t, loff_t, netfs_io_terminated_t, void *, - bool); +void __fscache_write_to_cache(struct fscache_cookie *cookie, + struct address_space *mapping, + loff_t start, size_t len, loff_t i_size, + netfs_io_terminated_t term_func, + void *term_func_priv, + bool using_pgpriv2, bool cond); extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); /** @@ -597,7 +600,8 @@ static inline void fscache_clear_page_bits(struct address_space *mapping, * @i_size: The new size of the inode * @term_func: The function to call upon completion * @term_func_priv: The private data for @term_func - * @caching: If PG_fscache has been set + * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write + * @caching: If we actually want to do the caching * * Helper function for a netfs to write dirty data from an inode into the cache * object that's backing it. @@ -608,19 +612,21 @@ static inline void fscache_clear_page_bits(struct address_space *mapping, * marked with PG_fscache. * * If given, @term_func will be called upon completion and supplied with - * @term_func_priv. Note that the PG_fscache flags will have been cleared by - * this point, so the netfs must retain its own pin on the mapping. + * @term_func_priv. Note that if @using_pgpriv2 is set, the PG_private_2 flags + * will have been cleared by this point, so the netfs must retain its own pin + * on the mapping. */ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, - bool caching) + bool using_pgpriv2, bool caching) { if (caching) __fscache_write_to_cache(cookie, mapping, start, len, i_size, - term_func, term_func_priv, caching); + term_func, term_func_priv, + using_pgpriv2, caching); else if (term_func) term_func(term_func_priv, -ENOBUFS, false); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 12f9e455d569..772f822dc6b8 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -192,6 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); + static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { @@ -221,15 +223,29 @@ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) } /* - * When d_splice_alias() moves a directory's no-key alias to its plaintext alias - * as a result of the encryption key being added, DCACHE_NOKEY_NAME must be - * cleared. Note that we don't have to support arbitrary moves of this flag - * because fscrypt doesn't allow no-key names to be the source or target of a - * rename(). + * When d_splice_alias() moves a directory's no-key alias to its + * plaintext alias as a result of the encryption key being added, + * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity + * to disable d_revalidate. Note that we don't have to support the + * inverse operation because fscrypt doesn't allow no-key names to be + * the source or target of a rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { - dentry->d_flags &= ~DCACHE_NOKEY_NAME; + /* + * VFS calls fscrypt_handle_d_move even for non-fscrypt + * filesystems. + */ + if (dentry->d_flags & DCACHE_NOKEY_NAME) { + dentry->d_flags &= ~DCACHE_NOKEY_NAME; + + /* + * Other filesystem features might be handling dentry + * revalidation, in which case it cannot be disabled. + */ + if (dentry->d_op->d_revalidate == fscrypt_d_revalidate) + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + } } /** @@ -261,6 +277,35 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return dentry->d_flags & DCACHE_NOKEY_NAME; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ + /* + * This code tries to only take ->d_lock when necessary to write + * to ->d_flags. We shouldn't be peeking on d_flags for + * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case + * there is a race, the worst it can happen is that we fail to + * unset DCACHE_OP_REVALIDATE and pay the cost of an extra + * d_revalidate. + */ + if (is_nokey_name) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_NOKEY_NAME; + spin_unlock(&dentry->d_lock); + } else if (dentry->d_flags & DCACHE_OP_REVALIDATE && + dentry->d_op->d_revalidate == fscrypt_d_revalidate) { + /* + * Unencrypted dentries and encrypted dentries where the + * key is available are always valid from fscrypt + * perspective. Avoid the cost of calling + * fscrypt_d_revalidate unnecessarily. + */ + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_OP_REVALIDATE; + spin_unlock(&dentry->d_lock); + } +} + /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); @@ -368,7 +413,6 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* bio.c */ bool fscrypt_decrypt_bio(struct bio *bio); @@ -425,6 +469,11 @@ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) return false; } +static inline void fscrypt_prepare_dentry(struct dentry *dentry, + bool is_nokey_name) +{ +} + /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { @@ -982,6 +1031,9 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, fname->usr_fname = &dentry->d_name; fname->disk_name.name = (unsigned char *)dentry->d_name.name; fname->disk_name.len = dentry->d_name.len; + + fscrypt_prepare_dentry(dentry, false); + return 0; } diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 8300a5286988..4da80e92f804 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -17,6 +17,25 @@ #include <linux/slab.h> #include <linux/bug.h> +/* Are there any inode/mount/sb objects watched with priority prio or above? */ +static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, + int prio) +{ + struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + + /* Were any marks ever added to any object on this sb? */ + if (!sbinfo) + return false; + + return atomic_long_read(&sbinfo->watched_objects[prio]); +} + +/* Are there any inode/mount/sb objects that are being watched at all? */ +static inline bool fsnotify_sb_has_watchers(struct super_block *sb) +{ + return fsnotify_sb_has_priority_watchers(sb, 0); +} + /* * Notify this @dir inode about a change in a child directory entry. * The directory entry may have turned positive or negative or its inode may @@ -30,7 +49,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { - if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(dir->i_sb)) return 0; return fsnotify(mask, data, data_type, dir, name, NULL, cookie); @@ -44,7 +63,7 @@ static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, static inline void fsnotify_inode(struct inode *inode, __u32 mask) { - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return; if (S_ISDIR(inode->i_mode)) @@ -59,7 +78,7 @@ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, { struct inode *inode = d_inode(dentry); - if (atomic_long_read(&inode->i_sb->s_fsnotify_connectors) == 0) + if (!fsnotify_sb_has_watchers(inode->i_sb)) return 0; if (S_ISDIR(inode->i_mode)) { @@ -97,6 +116,12 @@ static inline int fsnotify_file(struct file *file, __u32 mask) return 0; path = &file->f_path; + /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ + if (mask & ALL_FSNOTIFY_PERM_EVENTS && + !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, + FSNOTIFY_PRIO_CONTENT)) + return 0; + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7f63be5ca0f1..4dd6143db271 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -31,8 +31,8 @@ #define FS_ACCESS 0x00000001 /* File was accessed */ #define FS_MODIFY 0x00000002 /* File was modified */ #define FS_ATTRIB 0x00000004 /* Metadata changed */ -#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ -#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ #define FS_OPEN 0x00000020 /* File was opened */ #define FS_MOVED_FROM 0x00000040 /* File was moved from X */ #define FS_MOVED_TO 0x00000080 /* File was moved to Y */ @@ -177,6 +177,17 @@ struct fsnotify_event { }; /* + * fsnotify group priorities. + * Events are sent in order from highest priority to lowest priority. + */ +enum fsnotify_group_prio { + FSNOTIFY_PRIO_NORMAL = 0, /* normal notifiers, no permissions */ + FSNOTIFY_PRIO_CONTENT, /* fanotify permission events */ + FSNOTIFY_PRIO_PRE_CONTENT, /* fanotify pre-content events */ + __FSNOTIFY_PRIO_NUM +}; + +/* * A group is a "thing" that wants to receive notification about filesystem * events. The mask holds the subset of event types this group cares about. * refcnt on a group is up to the implementor and at any moment if it goes 0 @@ -201,14 +212,7 @@ struct fsnotify_group { wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ unsigned int q_len; /* events on the queue */ unsigned int max_events; /* maximum events allowed on the list */ - /* - * Valid fsnotify group priorities. Events are send in order from highest - * priority to lowest priority. We default to the lowest priority. - */ - #define FS_PRIO_0 0 /* normal notifiers, no permissions */ - #define FS_PRIO_1 1 /* fanotify content based access control */ - #define FS_PRIO_2 2 /* fanotify pre-content access */ - unsigned int priority; + enum fsnotify_group_prio priority; /* priority for sending events */ bool shutdown; /* group is being shut down, don't queue more events */ #define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */ @@ -457,13 +461,6 @@ FSNOTIFY_ITER_FUNCS(sb, SB) type++) /* - * fsnotify_connp_t is what we embed in objects which connector can be attached - * to. fsnotify_connp_t * is how we refer from connector back to object. - */ -struct fsnotify_mark_connector; -typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; - -/* * Inode/vfsmount/sb point to this structure which tracks all marks attached to * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this * structure. We destroy this structure when there are no more marks attached @@ -471,12 +468,14 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t; */ struct fsnotify_mark_connector { spinlock_t lock; - unsigned short type; /* Type of object [lock] */ + unsigned char type; /* Type of object [lock] */ + unsigned char prio; /* Highest priority group */ +#define FSNOTIFY_CONN_FLAG_IS_WATCHED 0x01 #define FSNOTIFY_CONN_FLAG_HAS_IREF 0x02 unsigned short flags; /* flags [lock] */ union { /* Object pointer [lock] */ - fsnotify_connp_t *obj; + void *obj; /* Used listing heads to free after srcu period expires */ struct fsnotify_mark_connector *destroy_next; }; @@ -484,6 +483,37 @@ struct fsnotify_mark_connector { }; /* + * Container for per-sb fsnotify state (sb marks and more). + * Attached lazily on first marked object on the sb and freed when killing sb. + */ +struct fsnotify_sb_info { + struct fsnotify_mark_connector __rcu *sb_marks; + /* + * Number of inode/mount/sb objects that are being watched in this sb. + * Note that inodes objects are currently double-accounted. + * + * The value in watched_objects[prio] is the number of objects that are + * watched by groups of priority >= prio, so watched_objects[0] is the + * total number of watched objects in this sb. + */ + atomic_long_t watched_objects[__FSNOTIFY_PRIO_NUM]; +}; + +static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb) +{ +#ifdef CONFIG_FSNOTIFY + return READ_ONCE(sb->s_fsnotify_info); +#else + return NULL; +#endif +} + +static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb) +{ + return &fsnotify_sb_info(sb)->watched_objects[0]; +} + +/* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events * of a type matching mask or only interested in those events. @@ -546,6 +576,7 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); +extern void fsnotify_sb_free(struct super_block *sb); extern u32 fsnotify_get_cookie(void); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) @@ -758,30 +789,35 @@ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); extern void fsnotify_init_mark(struct fsnotify_mark *mark, struct fsnotify_group *group); /* Find mark belonging to given group in the list of marks */ -extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp, - struct fsnotify_group *group); +struct fsnotify_mark *fsnotify_find_mark(void *obj, unsigned int obj_type, + struct fsnotify_group *group); /* attach the mark to the object */ -extern int fsnotify_add_mark(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, unsigned int obj_type, - int add_flags); -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - fsnotify_connp_t *connp, - unsigned int obj_type, int add_flags); +int fsnotify_add_mark(struct fsnotify_mark *mark, void *obj, + unsigned int obj_type, int add_flags); +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, void *obj, + unsigned int obj_type, int add_flags); /* attach the mark to the inode */ static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { - return fsnotify_add_mark(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags); + return fsnotify_add_mark(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, + add_flags); } static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark, struct inode *inode, int add_flags) { - return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks, - FSNOTIFY_OBJ_TYPE_INODE, add_flags); + return fsnotify_add_mark_locked(mark, inode, FSNOTIFY_OBJ_TYPE_INODE, + add_flags); +} + +static inline struct fsnotify_mark *fsnotify_find_inode_mark( + struct inode *inode, + struct fsnotify_group *group) +{ + return fsnotify_find_mark(inode, FSNOTIFY_OBJ_TYPE_INODE, group); } /* given a group and a mark, flag mark to be freed when all references are dropped */ @@ -845,6 +881,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} +static inline void fsnotify_sb_free(struct super_block *sb) +{} + static inline void fsnotify_update_flags(struct dentry *dentry) {} diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e8921871ef9a..800995c425e0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -83,7 +83,6 @@ static inline void early_trace_init(void) { } struct module; struct ftrace_hash; -struct ftrace_direct_func; #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ defined(CONFIG_DYNAMIC_FTRACE) @@ -414,7 +413,6 @@ struct ftrace_func_entry { }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS -extern int ftrace_direct_func_count; unsigned long ftrace_find_rec_direct(unsigned long ip); int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr); int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, @@ -426,7 +424,6 @@ void ftrace_stub_direct_tramp(void); #else struct ftrace_ops; -# define ftrace_direct_func_count 0 static inline unsigned long ftrace_find_rec_direct(unsigned long ip) { return 0; @@ -822,7 +819,8 @@ static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); #if defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS) || \ - defined(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) + defined(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) || \ + defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) /** * ftrace_modify_call - convert from one addr to another (no nop) * @rec: the call site record (e.g. mcount/fentry) @@ -1151,7 +1149,9 @@ static inline void unpause_graph_tracing(void) { } #ifdef CONFIG_TRACING enum ftrace_dump_mode; -extern enum ftrace_dump_mode ftrace_dump_on_oops; +#define MAX_TRACER_SIZE 100 +extern char ftrace_dump_on_oops[]; +extern int ftrace_dump_on_oops_enabled(void); extern int tracepoint_printk; extern void disable_trace_on_warning(void); diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h index 95421860397a..3ff4c277296f 100644 --- a/include/linux/fw_table.h +++ b/include/linux/fw_table.h @@ -40,12 +40,14 @@ union acpi_subtable_headers { int acpi_parse_entries_array(char *id, unsigned long table_size, union fw_table_header *table_header, + unsigned long max_length, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries); int cdat_table_parse(enum acpi_cdat_type type, acpi_tbl_entry_handler_arg handler_arg, void *arg, - struct acpi_table_cdat *table_header); + struct acpi_table_cdat *table_header, + unsigned long length); /* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */ #if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS) diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 2a72f55d26eb..0d79070c5a70 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -9,10 +9,16 @@ #ifndef _LINUX_FWNODE_H_ #define _LINUX_FWNODE_H_ -#include <linux/types.h> -#include <linux/list.h> #include <linux/bits.h> #include <linux/err.h> +#include <linux/list.h> +#include <linux/types.h> + +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; struct fwnode_operations; struct device; @@ -53,8 +59,10 @@ struct fwnode_handle { * fwnode link flags * * CYCLE: The fwnode link is part of a cycle. Don't defer probe. + * IGNORE: Completely ignore this link, even during cycle detection. */ #define FWLINK_FLAG_CYCLE BIT(0) +#define FWLINK_FLAG_IGNORE BIT(1) struct fwnode_link { struct fwnode_handle *supplier; @@ -187,7 +195,6 @@ struct fwnode_operations { if (fwnode_has_op(fwnode, op)) \ (fwnode)->ops->op(fwnode, ## __VA_ARGS__); \ } while (false) -#define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) static inline void fwnode_init(struct fwnode_handle *fwnode, const struct fwnode_operations *ops) @@ -209,9 +216,10 @@ static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; } -extern bool fw_devlink_is_strict(void); -int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); +int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, + u8 flags); void fwnode_links_purge(struct fwnode_handle *fwnode); void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); +bool fw_devlink_is_strict(void); #endif diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h index 847413164738..f3512fddf3d7 100644 --- a/include/linux/generic-radix-tree.h +++ b/include/linux/generic-radix-tree.h @@ -5,7 +5,7 @@ * DOC: Generic radix trees/sparse arrays * * Very simple and minimalistic, supporting arbitrary size entries up to - * PAGE_SIZE. + * GENRADIX_NODE_SIZE. * * A genradix is defined with the type it will store, like so: * @@ -45,12 +45,15 @@ struct genradix_root; +#define GENRADIX_NODE_SHIFT 9 +#define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT) + struct __genradix { struct genradix_root *root; }; /* - * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE: + * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE: */ #define __GENRADIX_INITIALIZER \ @@ -101,14 +104,14 @@ void __genradix_free(struct __genradix *); static inline size_t __idx_to_offset(size_t idx, size_t obj_size) { if (__builtin_constant_p(obj_size)) - BUILD_BUG_ON(obj_size > PAGE_SIZE); + BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE); else - BUG_ON(obj_size > PAGE_SIZE); + BUG_ON(obj_size > GENRADIX_NODE_SIZE); if (!is_power_of_2(obj_size)) { - size_t objs_per_page = PAGE_SIZE / obj_size; + size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size; - return (idx / objs_per_page) * PAGE_SIZE + + return (idx / objs_per_page) * GENRADIX_NODE_SIZE + (idx % objs_per_page) * obj_size; } else { return idx * obj_size; @@ -118,9 +121,9 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size) #define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) #define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) #define __genradix_objs_per_page(_radix) \ - (PAGE_SIZE / sizeof((_radix)->type[0])) + (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0])) #define __genradix_page_remainder(_radix) \ - (PAGE_SIZE % sizeof((_radix)->type[0])) + (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0])) #define __genradix_idx_to_offset(_radix, _idx) \ __idx_to_offset(_idx, __genradix_obj_size(_radix)) @@ -217,8 +220,8 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter, iter->offset += obj_size; if (!is_power_of_2(obj_size) && - (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) - iter->offset = round_up(iter->offset, PAGE_SIZE); + (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE) + iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE); iter->pos++; } @@ -235,8 +238,8 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter, return; } - if ((iter->offset & (PAGE_SIZE - 1)) == 0) - iter->offset -= PAGE_SIZE % obj_size; + if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0) + iter->offset -= GENRADIX_NODE_SIZE % obj_size; iter->offset -= obj_size; iter->pos--; @@ -263,7 +266,7 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter, genradix_for_each_from(_radix, _iter, _p, 0) #define genradix_last_pos(_radix) \ - (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1) + (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1) /** * genradix_for_each_reverse - iterate over entry in a genradix, reverse order diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h deleted file mode 100644 index c285968e437a..000000000000 --- a/include/linux/genetlink.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_GENERIC_NETLINK_H -#define __LINUX_GENERIC_NETLINK_H - -#include <uapi/linux/genetlink.h> - - -/* All generic netlink requests are serialized by a global lock. */ -extern void genl_lock(void); -extern void genl_unlock(void); - -/* for synchronisation between af_netlink and genetlink */ -extern atomic_t genl_sk_destructing_cnt; -extern wait_queue_head_t genl_sk_destructing_waitq; - -#define MODULE_ALIAS_GENL_FAMILY(family)\ - MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family) - -#endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h index a419d93789ff..621b87a87d74 100644 --- a/include/linux/genl_magic_struct.h +++ b/include/linux/genl_magic_struct.h @@ -15,8 +15,8 @@ #endif #include <linux/args.h> -#include <linux/genetlink.h> #include <linux/types.h> +#include <net/genetlink.h> extern int CONCATENATE(GENL_MAGIC_FAMILY, _genl_register)(void); extern void CONCATENATE(GENL_MAGIC_FAMILY, _genl_unregister)(void); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e2a916cf29c4..7f9691d375f0 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -6,6 +6,8 @@ #include <linux/mmzone.h> #include <linux/topology.h> +#include <linux/alloc_tag.h> +#include <linux/sched.h> struct vm_area_struct; struct mempolicy; @@ -155,6 +157,31 @@ static inline int gfp_zonelist(gfp_t flags) } /* + * gfp flag masking for nested internal allocations. + * + * For code that needs to do allocations inside the public allocation API (e.g. + * memory allocation tracking code) the allocations need to obey the caller + * allocation context constrains to prevent allocation context mismatches (e.g. + * GFP_KERNEL allocations in GFP_NOFS contexts) from potential deadlock + * situations. + * + * It is also assumed that these nested allocations are for internal kernel + * object storage purposes only and are not going to be used for DMA, etc. Hence + * we strip out all the zone information and leave just the context information + * intact. + * + * Further, internal allocations must fail before the higher level allocation + * can fail, so we must make them fail faster and fail silently. We also don't + * want them to deplete emergency reserves. Hence nested allocations must be + * prepared for these allocations to fail. + */ +static inline gfp_t gfp_nested_mask(gfp_t flags) +{ + return ((flags & (GFP_KERNEL | GFP_ATOMIC | __GFP_NOLOCKDEP)) | + (__GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)); +} + +/* * We get the zone list from the current node and the gfp_mask. * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones. * There are two zonelists per node, one for all zones with memory and @@ -175,42 +202,46 @@ static inline void arch_free_page(struct page *page, int order) { } static inline void arch_alloc_page(struct page *page, int order) { } #endif -struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, +struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); -struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, +#define __alloc_pages(...) alloc_hooks(__alloc_pages_noprof(__VA_ARGS__)) + +struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); +#define __folio_alloc(...) alloc_hooks(__folio_alloc_noprof(__VA_ARGS__)) -unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, +unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, struct list_head *page_list, struct page **page_array); +#define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__)) -unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp, +unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array); +#define alloc_pages_bulk_array_mempolicy(...) \ + alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__)) /* Bulk allocate order-0 pages */ -static inline unsigned long -alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list) -{ - return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, list, NULL); -} +#define alloc_pages_bulk_list(_gfp, _nr_pages, _list) \ + __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL) -static inline unsigned long -alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_array) -{ - return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array); -} +#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array) \ + __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array) static inline unsigned long -alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) +alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, + struct page **page_array) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); + return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, NULL, page_array); } +#define alloc_pages_bulk_array_node(...) \ + alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__)) + static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) { gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); @@ -230,96 +261,126 @@ static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) * online. For more general interface, see alloc_pages_node(). */ static inline struct page * -__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) +__alloc_pages_node_noprof(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); warn_if_node_offline(nid, gfp_mask); - return __alloc_pages(gfp_mask, order, nid, NULL); + return __alloc_pages_noprof(gfp_mask, order, nid, NULL); } +#define __alloc_pages_node(...) alloc_hooks(__alloc_pages_node_noprof(__VA_ARGS__)) + static inline -struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) +struct folio *__folio_alloc_node_noprof(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); warn_if_node_offline(nid, gfp); - return __folio_alloc(gfp, order, nid, NULL); + return __folio_alloc_noprof(gfp, order, nid, NULL); } +#define __folio_alloc_node(...) alloc_hooks(__folio_alloc_node_noprof(__VA_ARGS__)) + /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and * online. */ -static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, - unsigned int order) +static inline struct page *alloc_pages_node_noprof(int nid, gfp_t gfp_mask, + unsigned int order) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - return __alloc_pages_node(nid, gfp_mask, order); + return __alloc_pages_node_noprof(nid, gfp_mask, order); } +#define alloc_pages_node(...) alloc_hooks(alloc_pages_node_noprof(__VA_ARGS__)) + #ifdef CONFIG_NUMA -struct page *alloc_pages(gfp_t gfp, unsigned int order); -struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, +struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order); +struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); -struct folio *folio_alloc(gfp_t gfp, unsigned int order); -struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma, +struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); +struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); #else -static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) +static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { - return alloc_pages_node(numa_node_id(), gfp_mask, order); + return alloc_pages_node_noprof(numa_node_id(), gfp_mask, order); } -static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order, +static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid) { - return alloc_pages(gfp, order); + return alloc_pages_noprof(gfp, order); } -static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order) +static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); } -#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \ - folio_alloc(gfp, order) +#define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage) \ + folio_alloc_noprof(gfp, order) #endif + +#define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) +#define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__)) +#define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__)) +#define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__)) + #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -static inline struct page *alloc_page_vma(gfp_t gfp, + +static inline struct page *alloc_page_vma_noprof(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { - struct folio *folio = vma_alloc_folio(gfp, 0, vma, addr, false); + struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr, false); return &folio->page; } +#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) + +extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); +#define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__)) -extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); -extern unsigned long get_zeroed_page(gfp_t gfp_mask); +extern unsigned long get_zeroed_page_noprof(gfp_t gfp_mask); +#define get_zeroed_page(...) alloc_hooks(get_zeroed_page_noprof(__VA_ARGS__)) + +void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask) __alloc_size(1); +#define alloc_pages_exact(...) alloc_hooks(alloc_pages_exact_noprof(__VA_ARGS__)) -void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1); void free_pages_exact(void *virt, size_t size); -__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); -#define __get_free_page(gfp_mask) \ - __get_free_pages((gfp_mask), 0) +__meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); +#define alloc_pages_exact_nid(...) \ + alloc_hooks(alloc_pages_exact_nid_noprof(__VA_ARGS__)) + +#define __get_free_page(gfp_mask) \ + __get_free_pages((gfp_mask), 0) -#define __get_dma_pages(gfp_mask, order) \ - __get_free_pages((gfp_mask) | GFP_DMA, (order)) +#define __get_dma_pages(gfp_mask, order) \ + __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; +void page_frag_cache_drain(struct page_frag_cache *nc); extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask); +void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask, unsigned int align_mask); + +static inline void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); +} static inline void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask) { - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); } extern void page_frag_free(void *addr); @@ -334,7 +395,7 @@ void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); void page_alloc_init_late(void); -void setup_pcp_cacheinfo(void); +void setup_pcp_cacheinfo(unsigned int cpu); /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what @@ -366,10 +427,14 @@ extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC /* The below functions must be run on a range from a single zone. */ -extern int alloc_contig_range(unsigned long start, unsigned long end, +extern int alloc_contig_range_noprof(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask); -extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, - int nid, nodemask_t *nodemask); +#define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__)) + +extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, + int nid, nodemask_t *nodemask); +#define alloc_contig_pages(...) alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__)) + #endif void free_contig_range(unsigned long pfn, unsigned long nr_pages); diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index 1b6053da8754..313be4ad79fd 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -2,6 +2,8 @@ #ifndef __LINUX_GFP_TYPES_H #define __LINUX_GFP_TYPES_H +#include <linux/bits.h> + /* The typedef is in types.h but we want the documentation here */ #if 0 /** @@ -21,44 +23,86 @@ typedef unsigned int __bitwise gfp_t; * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c */ +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif +#ifdef CONFIG_SLAB_OBJ_EXT + ___GFP_NO_OBJ_EXT_BIT, +#endif + ___GFP_LAST_BIT +}; + /* Plain integer GFP bitmasks. Do not use this directly. */ -#define ___GFP_DMA 0x01u -#define ___GFP_HIGHMEM 0x02u -#define ___GFP_DMA32 0x04u -#define ___GFP_MOVABLE 0x08u -#define ___GFP_RECLAIMABLE 0x10u -#define ___GFP_HIGH 0x20u -#define ___GFP_IO 0x40u -#define ___GFP_FS 0x80u -#define ___GFP_ZERO 0x100u +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) /* 0x200u unused */ -#define ___GFP_DIRECT_RECLAIM 0x400u -#define ___GFP_KSWAPD_RECLAIM 0x800u -#define ___GFP_WRITE 0x1000u -#define ___GFP_NOWARN 0x2000u -#define ___GFP_RETRY_MAYFAIL 0x4000u -#define ___GFP_NOFAIL 0x8000u -#define ___GFP_NORETRY 0x10000u -#define ___GFP_MEMALLOC 0x20000u -#define ___GFP_COMP 0x40000u -#define ___GFP_NOMEMALLOC 0x80000u -#define ___GFP_HARDWALL 0x100000u -#define ___GFP_THISNODE 0x200000u -#define ___GFP_ACCOUNT 0x400000u -#define ___GFP_ZEROTAGS 0x800000u +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) #ifdef CONFIG_KASAN_HW_TAGS -#define ___GFP_SKIP_ZERO 0x1000000u -#define ___GFP_SKIP_KASAN 0x2000000u +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) #else #define ___GFP_SKIP_ZERO 0 #define ___GFP_SKIP_KASAN 0 #endif #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) #else #define ___GFP_NOLOCKDEP 0 #endif -/* If the above are modified, __GFP_BITS_SHIFT may need updating */ +#ifdef CONFIG_SLAB_OBJ_EXT +#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT) +#else +#define ___GFP_NO_OBJ_EXT 0 +#endif /* * Physical address zone modifiers (see linux/mmzone.h - low four bits) @@ -99,12 +143,15 @@ typedef unsigned int __bitwise gfp_t; * node with no fallbacks or placement policy enforcements. * * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * + * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) #define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) +#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT) /** * DOC: Watermark modifiers @@ -249,7 +296,7 @@ typedef unsigned int __bitwise gfp_t; #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 7ecc25c543ce..56ac7e7a2889 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -74,6 +74,12 @@ static inline bool gpio_is_valid(int number) * Until they are all fixed, leave 0-512 space for them. */ #define GPIO_DYNAMIC_BASE 512 +/* + * Define the maximum of the possible GPIO in the global numberspace. + * While the GPIO base and numbers are positive, we limit it with signed + * maximum as a lot of code is using negative values for special cases. + */ +#define GPIO_DYNAMIC_MAX INT_MAX /* Always use the library code for GPIO management calls, * or when sleeping may be involved. @@ -114,8 +120,6 @@ static inline int gpio_to_irq(unsigned gpio) } int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); -int gpio_request_array(const struct gpio *array, size_t num); -void gpio_free_array(const struct gpio *array, size_t num); /* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */ @@ -146,11 +150,6 @@ static inline int gpio_request_one(unsigned gpio, return -ENOSYS; } -static inline int gpio_request_array(const struct gpio *array, size_t num) -{ - return -ENOSYS; -} - static inline void gpio_free(unsigned gpio) { might_sleep(); @@ -159,14 +158,6 @@ static inline void gpio_free(unsigned gpio) WARN_ON(1); } -static inline void gpio_free_array(const struct gpio *array, size_t num) -{ - might_sleep(); - - /* GPIO can never have been requested */ - WARN_ON(1); -} - static inline int gpio_direction_input(unsigned gpio) { return -ENOSYS; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 7f75c9a51874..0032bb6e7d8f 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -335,10 +335,12 @@ struct gpio_irq_chip { * (same as GPIO_LINE_DIRECTION_OUT / GPIO_LINE_DIRECTION_IN), * or negative error. It is recommended to always implement this * function, even on input-only or output-only gpio chips. - * @direction_input: configures signal "offset" as input, or returns error - * This can be omitted on input-only or output-only gpio chips. - * @direction_output: configures signal "offset" as output, or returns error - * This can be omitted on input-only or output-only gpio chips. + * @direction_input: configures signal "offset" as input, returns 0 on success + * or a negative error number. This can be omitted on input-only or + * output-only gpio chips. + * @direction_output: configures signal "offset" as output, returns 0 on + * success or a negative error number. This can be omitted on input-only + * or output-only gpio chips. * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error @@ -374,9 +376,7 @@ struct gpio_irq_chip { * @names: if set, must be an array of strings to use as alternative * names for the GPIOs in this chip. Any entry in the array * may be NULL if there is no alias for the GPIO, however the - * array must be @ngpio entries long. A name can include a single printk - * format specifier for an unsigned int. It is substituted by the actual - * number of the gpio. + * array must be @ngpio entries long. * @can_sleep: flag must be set iff get()/set() methods sleep, as they * must while accessing GPIO expander chips over I2C or SPI. This * implies that if the chip supports IRQs, these IRQs need to be threaded @@ -550,6 +550,21 @@ DEFINE_CLASS(_gpiochip_for_each_data, const char **label, int *i) /** + * for_each_hwgpio - Iterates over all GPIOs for given chip. + * @_chip: Chip to iterate over. + * @_i: Loop counter. + * @_label: Place to store the address of the label if the GPIO is requested. + * Set to NULL for unused GPIOs. + */ +#define for_each_hwgpio(_chip, _i, _label) \ + for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ + *_data.i < _chip->ngpio; \ + (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ + if (IS_ERR(*_data.label = \ + gpiochip_dup_line_label(_chip, *_data.i))) {} \ + else + +/** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range * @_chip: the chip to query * @_i: loop variable @@ -626,10 +641,9 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key); -struct gpio_device *gpio_device_find(void *data, - int (*match)(struct gpio_chip *gc, void *data)); -struct gpio_device *gpio_device_find_by_label(const char *label); -struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); +struct gpio_device *gpio_device_find(const void *data, + int (*match)(struct gpio_chip *gc, + const void *data)); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); @@ -704,18 +718,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) -int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hwirq); -void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); - -int gpiochip_irq_domain_activate(struct irq_domain *domain, - struct irq_data *data, bool reserve); -void gpiochip_irq_domain_deactivate(struct irq_domain *domain, - struct irq_data *data); - -bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, - unsigned int offset); - #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain); @@ -808,6 +810,9 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); int gpio_device_get_base(struct gpio_device *gdev); const char *gpio_device_get_label(struct gpio_device *gdev); +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + #else /* CONFIG_GPIOLIB */ #include <asm/bug.h> @@ -837,6 +842,18 @@ static inline const char *gpio_device_get_label(struct gpio_device *gdev) return NULL; } +static inline struct gpio_device *gpio_device_find_by_label(const char *label) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h new file mode 100644 index 000000000000..b5a84864650d --- /dev/null +++ b/include/linux/gpio/gpio-nomadik.h @@ -0,0 +1,294 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GPIO_NOMADIK_H +#define __LINUX_GPIO_NOMADIK_H + +struct fwnode_handle; + +/* Package definitions */ +#define PINCTRL_NMK_STN8815 0 +#define PINCTRL_NMK_DB8500 1 + +#define GPIO_BLOCK_SHIFT 5 +#define NMK_GPIO_PER_CHIP BIT(GPIO_BLOCK_SHIFT) +#define NMK_MAX_BANKS DIV_ROUND_UP(512, NMK_GPIO_PER_CHIP) + +/* Register in the logic block */ +#define NMK_GPIO_DAT 0x00 +#define NMK_GPIO_DATS 0x04 +#define NMK_GPIO_DATC 0x08 +#define NMK_GPIO_PDIS 0x0c +#define NMK_GPIO_DIR 0x10 +#define NMK_GPIO_DIRS 0x14 +#define NMK_GPIO_DIRC 0x18 +#define NMK_GPIO_SLPC 0x1c +#define NMK_GPIO_AFSLA 0x20 +#define NMK_GPIO_AFSLB 0x24 +#define NMK_GPIO_LOWEMI 0x28 + +#define NMK_GPIO_RIMSC 0x40 +#define NMK_GPIO_FIMSC 0x44 +#define NMK_GPIO_IS 0x48 +#define NMK_GPIO_IC 0x4c +#define NMK_GPIO_RWIMSC 0x50 +#define NMK_GPIO_FWIMSC 0x54 +#define NMK_GPIO_WKS 0x58 +/* These appear in DB8540 and later ASICs */ +#define NMK_GPIO_EDGELEVEL 0x5C +#define NMK_GPIO_LEVEL 0x60 + +/* Pull up/down values */ +enum nmk_gpio_pull { + NMK_GPIO_PULL_NONE, + NMK_GPIO_PULL_UP, + NMK_GPIO_PULL_DOWN, +}; + +/* Sleep mode */ +enum nmk_gpio_slpm { + NMK_GPIO_SLPM_INPUT, + NMK_GPIO_SLPM_WAKEUP_ENABLE = NMK_GPIO_SLPM_INPUT, + NMK_GPIO_SLPM_NOCHANGE, + NMK_GPIO_SLPM_WAKEUP_DISABLE = NMK_GPIO_SLPM_NOCHANGE, +}; + +struct nmk_gpio_chip { + struct gpio_chip chip; + void __iomem *addr; + struct clk *clk; + unsigned int bank; + void (*set_ioforce)(bool enable); + spinlock_t lock; + bool sleepmode; + bool is_mobileye_soc; + /* Keep track of configured edges */ + u32 edge_rising; + u32 edge_falling; + u32 real_wake; + u32 rwimsc; + u32 fwimsc; + u32 rimsc; + u32 fimsc; + u32 pull_up; + u32 lowemi; +}; + +/* Alternate functions: function C is set in hw by setting both A and B */ +#define NMK_GPIO_ALT_GPIO 0 +#define NMK_GPIO_ALT_A 1 +#define NMK_GPIO_ALT_B 2 +#define NMK_GPIO_ALT_C (NMK_GPIO_ALT_A | NMK_GPIO_ALT_B) + +#define NMK_GPIO_ALT_CX_SHIFT 2 +#define NMK_GPIO_ALT_C1 ((1<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C2 ((2<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C3 ((3<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) +#define NMK_GPIO_ALT_C4 ((4<<NMK_GPIO_ALT_CX_SHIFT) | NMK_GPIO_ALT_C) + +#define PRCM_GPIOCR_ALTCX(pin_num,\ + altc1_used, altc1_ri, altc1_cb,\ + altc2_used, altc2_ri, altc2_cb,\ + altc3_used, altc3_ri, altc3_cb,\ + altc4_used, altc4_ri, altc4_cb)\ +{\ + .pin = pin_num,\ + .altcx[PRCM_IDX_GPIOCR_ALTC1] = {\ + .used = altc1_used,\ + .reg_index = altc1_ri,\ + .control_bit = altc1_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC2] = {\ + .used = altc2_used,\ + .reg_index = altc2_ri,\ + .control_bit = altc2_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC3] = {\ + .used = altc3_used,\ + .reg_index = altc3_ri,\ + .control_bit = altc3_cb\ + },\ + .altcx[PRCM_IDX_GPIOCR_ALTC4] = {\ + .used = altc4_used,\ + .reg_index = altc4_ri,\ + .control_bit = altc4_cb\ + },\ +} + +/** + * enum prcm_gpiocr_reg_index + * Used to reference an PRCM GPIOCR register address. + */ +enum prcm_gpiocr_reg_index { + PRCM_IDX_GPIOCR1, + PRCM_IDX_GPIOCR2, + PRCM_IDX_GPIOCR3 +}; +/** + * enum prcm_gpiocr_altcx_index + * Used to reference an Other alternate-C function. + */ +enum prcm_gpiocr_altcx_index { + PRCM_IDX_GPIOCR_ALTC1, + PRCM_IDX_GPIOCR_ALTC2, + PRCM_IDX_GPIOCR_ALTC3, + PRCM_IDX_GPIOCR_ALTC4, + PRCM_IDX_GPIOCR_ALTC_MAX, +}; + +/** + * struct prcm_gpio_altcx - Other alternate-C function + * @used: other alternate-C function availability + * @reg_index: PRCM GPIOCR register index used to control the function + * @control_bit: PRCM GPIOCR bit used to control the function + */ +struct prcm_gpiocr_altcx { + bool used:1; + u8 reg_index:2; + u8 control_bit:5; +} __packed; + +/** + * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin + * @pin: The pin number + * @altcx: array of other alternate-C[1-4] functions + */ +struct prcm_gpiocr_altcx_pin_desc { + unsigned short pin; + struct prcm_gpiocr_altcx altcx[PRCM_IDX_GPIOCR_ALTC_MAX]; +}; + +/** + * struct nmk_function - Nomadik pinctrl mux function + * @name: The name of the function, exported to pinctrl core. + * @groups: An array of pin groups that may select this function. + * @ngroups: The number of entries in @groups. + */ +struct nmk_function { + const char *name; + const char * const *groups; + unsigned int ngroups; +}; + +/** + * struct nmk_pingroup - describes a Nomadik pin group + * @grp: Generic data of the pin group (name and pins) + * @altsetting: the altsetting to apply to all pins in this group to + * configure them to be used by a function + */ +struct nmk_pingroup { + struct pingroup grp; + int altsetting; +}; + +#define NMK_PIN_GROUP(a, b) \ + { \ + .grp = PINCTRL_PINGROUP(#a, a##_pins, ARRAY_SIZE(a##_pins)), \ + .altsetting = b, \ + } + +/** + * struct nmk_pinctrl_soc_data - Nomadik pin controller per-SoC configuration + * @pins: An array describing all pins the pin controller affects. + * All pins which are also GPIOs must be listed first within the + * array, and be numbered identically to the GPIO controller's + * numbering. + * @npins: The number of entries in @pins. + * @functions: The functions supported on this SoC. + * @nfunction: The number of entries in @functions. + * @groups: An array describing all pin groups the pin SoC supports. + * @ngroups: The number of entries in @groups. + * @altcx_pins: The pins that support Other alternate-C function on this SoC + * @npins_altcx: The number of Other alternate-C pins + * @prcm_gpiocr_registers: The array of PRCM GPIOCR registers on this SoC + */ +struct nmk_pinctrl_soc_data { + const struct pinctrl_pin_desc *pins; + unsigned int npins; + const struct nmk_function *functions; + unsigned int nfunctions; + const struct nmk_pingroup *groups; + unsigned int ngroups; + const struct prcm_gpiocr_altcx_pin_desc *altcx_pins; + unsigned int npins_altcx; + const u16 *prcm_gpiocr_registers; +}; + +#ifdef CONFIG_PINCTRL_STN8815 + +void nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_stn8815_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +#ifdef CONFIG_PINCTRL_DB8500 + +void nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_db8500_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +#ifdef CONFIG_PINCTRL_DB8540 + +void nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc); + +#else + +static inline void +nmk_pinctrl_db8540_init(const struct nmk_pinctrl_soc_data **soc) +{ +} + +#endif + +struct platform_device; + +#ifdef CONFIG_DEBUG_FS + +/* + * Symbols declared in gpio-nomadik used by pinctrl-nomadik. If pinctrl-nomadik + * is enabled, then gpio-nomadik is enabled as well; the reverse if not always + * true. + */ +void nmk_gpio_dbg_show_one(struct seq_file *s, struct pinctrl_dev *pctldev, + struct gpio_chip *chip, unsigned int offset, + unsigned int gpio); + +#else + +static inline void nmk_gpio_dbg_show_one(struct seq_file *s, + struct pinctrl_dev *pctldev, + struct gpio_chip *chip, + unsigned int offset, + unsigned int gpio) +{ +} + +#endif + +void __nmk_gpio_make_output(struct nmk_gpio_chip *nmk_chip, + unsigned int offset, int val); +void __nmk_gpio_set_slpm(struct nmk_gpio_chip *nmk_chip, unsigned int offset, + enum nmk_gpio_slpm mode); +struct nmk_gpio_chip *nmk_gpio_populate_chip(struct fwnode_handle *fwnode, + struct platform_device *pdev); + +/* Symbols declared in pinctrl-nomadik used by gpio-nomadik. */ +#ifdef CONFIG_PINCTRL_NOMADIK +extern struct nmk_gpio_chip *nmk_gpio_chips[NMK_MAX_BANKS]; +extern spinlock_t nmk_gpio_slpm_lock; +int __maybe_unused nmk_prcm_gpiocr_get_mode(struct pinctrl_dev *pctldev, + int gpio); +#endif + +#endif /* __LINUX_GPIO_NOMADIK_H */ diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h index 6c75c8bd44a0..0d2209308002 100644 --- a/include/linux/gpio/property.h +++ b/include/linux/gpio/property.h @@ -2,10 +2,13 @@ #ifndef __LINUX_GPIO_PROPERTY_H #define __LINUX_GPIO_PROPERTY_H -#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */ #include <linux/property.h> +struct software_node; + #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ PROPERTY_ENTRY_REF(_name_, _chip_node_, _idx_, _flags_) +extern const struct software_node swnode_gpio_undefined; + #endif /* __LINUX_GPIO_PROPERTY_H */ diff --git a/include/linux/greybus.h b/include/linux/greybus.h index 18c0fb958b74..634c9511cf78 100644 --- a/include/linux/greybus.h +++ b/include/linux/greybus.h @@ -104,44 +104,14 @@ void gb_debugfs_init(void); void gb_debugfs_cleanup(void); struct dentry *gb_debugfs_get(void); -extern struct bus_type greybus_bus_type; - -extern struct device_type greybus_hd_type; -extern struct device_type greybus_module_type; -extern struct device_type greybus_interface_type; -extern struct device_type greybus_control_type; -extern struct device_type greybus_bundle_type; -extern struct device_type greybus_svc_type; - -static inline int is_gb_host_device(const struct device *dev) -{ - return dev->type == &greybus_hd_type; -} - -static inline int is_gb_module(const struct device *dev) -{ - return dev->type == &greybus_module_type; -} - -static inline int is_gb_interface(const struct device *dev) -{ - return dev->type == &greybus_interface_type; -} - -static inline int is_gb_control(const struct device *dev) -{ - return dev->type == &greybus_control_type; -} - -static inline int is_gb_bundle(const struct device *dev) -{ - return dev->type == &greybus_bundle_type; -} - -static inline int is_gb_svc(const struct device *dev) -{ - return dev->type == &greybus_svc_type; -} +extern const struct bus_type greybus_bus_type; + +extern const struct device_type greybus_hd_type; +extern const struct device_type greybus_module_type; +extern const struct device_type greybus_interface_type; +extern const struct device_type greybus_control_type; +extern const struct device_type greybus_bundle_type; +extern const struct device_type greybus_svc_type; static inline bool cport_id_valid(struct gb_host_device *hd, u16 cport_id) { diff --git a/include/linux/greybus/greybus_protocols.h b/include/linux/greybus/greybus_protocols.h index aeb8f9243545..820134b0105c 100644 --- a/include/linux/greybus/greybus_protocols.h +++ b/include/linux/greybus/greybus_protocols.h @@ -232,9 +232,7 @@ struct gb_fw_download_fetch_firmware_request { __le32 size; } __packed; -struct gb_fw_download_fetch_firmware_response { - __u8 data[0]; -} __packed; +/* gb_fw_download_fetch_firmware_response contains no other data */ /* firmware download release firmware request */ struct gb_fw_download_release_firmware_request { @@ -414,9 +412,7 @@ struct gb_bootrom_get_firmware_request { __le32 size; } __packed; -struct gb_bootrom_get_firmware_response { - __u8 data[0]; -} __packed; +/* gb_bootrom_get_firmware_response contains no other data */ /* Bootrom protocol Ready to boot request */ struct gb_bootrom_ready_to_boot_request { diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 13b1e65fbdcc..6730ee900ee1 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -21,6 +21,10 @@ #define HID_USAGE_SENSOR_ALS 0x200041 #define HID_USAGE_SENSOR_DATA_LIGHT 0x2004d0 #define HID_USAGE_SENSOR_LIGHT_ILLUM 0x2004d1 +#define HID_USAGE_SENSOR_LIGHT_COLOR_TEMPERATURE 0x2004d2 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY 0x2004d3 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_X 0x2004d4 +#define HID_USAGE_SENSOR_LIGHT_CHROMATICITY_Y 0x2004d5 /* PROX (200011) */ #define HID_USAGE_SENSOR_PROX 0x200011 diff --git a/include/linux/hid.h b/include/linux/hid.h index 7c26db874ff0..8e06d89698e6 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -474,9 +474,9 @@ struct hid_usage { __s8 wheel_factor; /* 120/resolution_multiplier */ __u16 code; /* input driver code */ __u8 type; /* input driver type */ - __s8 hat_min; /* hat switch fun */ - __s8 hat_max; /* ditto */ - __s8 hat_dir; /* ditto */ + __s16 hat_min; /* hat switch fun */ + __s16 hat_max; /* ditto */ + __s16 hat_dir; /* ditto */ __s16 wheel_accumulated; /* hi-res wheel */ }; @@ -683,9 +683,9 @@ struct hid_device { /* device report descriptor */ unsigned int id; /* system unique id */ -#ifdef CONFIG_BPF +#ifdef CONFIG_HID_BPF struct hid_bpf bpf; /* hid-bpf data */ -#endif /* CONFIG_BPF */ +#endif /* CONFIG_HID_BPF */ }; void hiddev_free(struct kref *ref); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 7118ac28d468..eec2592dec12 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -103,6 +103,9 @@ struct hid_bpf_ops { unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype); + int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len); + int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, + u8 *data, u32 size, int interrupt); struct module *owner; const struct bus_type *bus_type; }; @@ -149,10 +152,8 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline void hid_bpf_device_init(struct hid_device *hid) {} -static inline u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size) -{ - return kmemdup(rdesc, *size, GFP_KERNEL); -} +#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \ + ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL)) #endif /* CONFIG_HID_BPF */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 451c1dff0e87..00341b56d291 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -439,6 +439,13 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +/** + * memcpy_from_folio - Copy a range of bytes from a folio. + * @to: The memory to copy to. + * @folio: The folio to read from. + * @offset: The first byte in the folio to read. + * @len: The number of bytes to copy. + */ static inline void memcpy_from_folio(char *to, struct folio *folio, size_t offset, size_t len) { @@ -460,6 +467,13 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, } while (len > 0); } +/** + * memcpy_to_folio - Copy a range of bytes to a folio. + * @folio: The folio to write to. + * @offset: The first byte in the folio to store to. + * @from: The memory to copy from. + * @len: The number of bytes to copy. + */ static inline void memcpy_to_folio(struct folio *folio, size_t offset, const char *from, size_t len) { diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 5f4c74facf6a..9d7754ad5e9b 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -43,6 +43,7 @@ #define QM_MB_CMD_CQC_BT 0x5 #define QM_MB_CMD_SQC_VFT_V2 0x6 #define QM_MB_CMD_STOP_QP 0x8 +#define QM_MB_CMD_FLUSH_QM 0x9 #define QM_MB_CMD_SRC 0xc #define QM_MB_CMD_DST 0xd @@ -151,6 +152,7 @@ enum qm_cap_bits { QM_SUPPORT_DB_ISOLATION = 0x0, QM_SUPPORT_FUNC_QOS, QM_SUPPORT_STOP_QP, + QM_SUPPORT_STOP_FUNC, QM_SUPPORT_MB_COMMAND, QM_SUPPORT_SVA_PREFETCH, QM_SUPPORT_RPM, @@ -161,6 +163,11 @@ struct qm_dev_alg { const char *alg; }; +struct qm_dev_dfx { + u32 dev_state; + u32 dev_timeout; +}; + struct dfx_diff_registers { u32 *regs; u32 reg_offset; @@ -189,6 +196,7 @@ struct qm_debug { struct dentry *debug_root; struct dentry *qm_d; struct debugfs_file files[DEBUG_FILE_NUM]; + struct qm_dev_dfx dev_dfx; unsigned int *qm_last_words; /* ACC engines recoreding last regs */ unsigned int *last_words; @@ -523,7 +531,7 @@ void hisi_qm_uninit(struct hisi_qm *qm); int hisi_qm_start(struct hisi_qm *qm); int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r); int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg); -int hisi_qm_stop_qp(struct hisi_qp *qp); +void hisi_qm_stop_qp(struct hisi_qp *qp); int hisi_qp_send(struct hisi_qp *qp, const void *msg); void hisi_qm_debug_init(struct hisi_qm *qm); void hisi_qm_debug_regs_clear(struct hisi_qm *qm); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 641c4567cfa7..aa1e65ccb615 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -18,12 +18,8 @@ #include <linux/list.h> #include <linux/percpu-defs.h> #include <linux/rbtree.h> -#include <linux/seqlock.h> #include <linux/timer.h> -struct hrtimer_clock_base; -struct hrtimer_cpu_base; - /* * Mode arguments of xxx_hrtimer functions: * @@ -98,107 +94,6 @@ struct hrtimer_sleeper { struct task_struct *task; }; -#ifdef CONFIG_64BIT -# define __hrtimer_clock_base_align ____cacheline_aligned -#else -# define __hrtimer_clock_base_align -#endif - -/** - * struct hrtimer_clock_base - the timer base for a specific clock - * @cpu_base: per cpu clock base - * @index: clock type index for per_cpu support when moving a - * timer to a base on another cpu. - * @clockid: clock id for per_cpu support - * @seq: seqcount around __run_hrtimer - * @running: pointer to the currently running hrtimer - * @active: red black tree root node for the active timers - * @get_time: function to retrieve the current time of the clock - * @offset: offset of this clock to the monotonic base - */ -struct hrtimer_clock_base { - struct hrtimer_cpu_base *cpu_base; - unsigned int index; - clockid_t clockid; - seqcount_raw_spinlock_t seq; - struct hrtimer *running; - struct timerqueue_head active; - ktime_t (*get_time)(void); - ktime_t offset; -} __hrtimer_clock_base_align; - -enum hrtimer_base_type { - HRTIMER_BASE_MONOTONIC, - HRTIMER_BASE_REALTIME, - HRTIMER_BASE_BOOTTIME, - HRTIMER_BASE_TAI, - HRTIMER_BASE_MONOTONIC_SOFT, - HRTIMER_BASE_REALTIME_SOFT, - HRTIMER_BASE_BOOTTIME_SOFT, - HRTIMER_BASE_TAI_SOFT, - HRTIMER_MAX_CLOCK_BASES, -}; - -/** - * struct hrtimer_cpu_base - the per cpu clock bases - * @lock: lock protecting the base and associated clock bases - * and timers - * @cpu: cpu number - * @active_bases: Bitfield to mark bases with active timers - * @clock_was_set_seq: Sequence counter of clock was set events - * @hres_active: State of high resolution mode - * @in_hrtirq: hrtimer_interrupt() is currently executing - * @hang_detected: The last hrtimer interrupt detected a hang - * @softirq_activated: displays, if the softirq is raised - update of softirq - * related settings is not required then. - * @nr_events: Total number of hrtimer interrupt events - * @nr_retries: Total number of hrtimer interrupt retries - * @nr_hangs: Total number of hrtimer interrupt hangs - * @max_hang_time: Maximum time spent in hrtimer_interrupt - * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are - * expired - * @online: CPU is online from an hrtimers point of view - * @timer_waiters: A hrtimer_cancel() invocation waits for the timer - * callback to finish. - * @expires_next: absolute time of the next event, is required for remote - * hrtimer enqueue; it is the total first expiry time (hard - * and soft hrtimer are taken into account) - * @next_timer: Pointer to the first expiring timer - * @softirq_expires_next: Time to check, if soft queues needs also to be expired - * @softirq_next_timer: Pointer to the first expiring softirq based timer - * @clock_base: array of clock bases for this cpu - * - * Note: next_timer is just an optimization for __remove_hrtimer(). - * Do not dereference the pointer because it is not reliable on - * cross cpu removals. - */ -struct hrtimer_cpu_base { - raw_spinlock_t lock; - unsigned int cpu; - unsigned int active_bases; - unsigned int clock_was_set_seq; - unsigned int hres_active : 1, - in_hrtirq : 1, - hang_detected : 1, - softirq_activated : 1, - online : 1; -#ifdef CONFIG_HIGH_RES_TIMERS - unsigned int nr_events; - unsigned short nr_retries; - unsigned short nr_hangs; - unsigned int max_hang_time; -#endif -#ifdef CONFIG_PREEMPT_RT - spinlock_t softirq_expiry_lock; - atomic_t timer_waiters; -#endif - ktime_t expires_next; - struct hrtimer *next_timer; - ktime_t softirq_expires_next; - struct hrtimer *softirq_next_timer; - struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; -} ____cacheline_aligned; - static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->node.expires = time; @@ -447,20 +342,12 @@ extern u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval); /** - * hrtimer_forward_now - forward the timer expiry so it expires after now + * hrtimer_forward_now() - forward the timer expiry so it expires after now * @timer: hrtimer to forward * @interval: the interval to forward * - * Forward the timer expiry so it will expire after the current time - * of the hrtimer clock base. Returns the number of overruns. - * - * Can be safely called from the callback function of @timer. If - * called from other contexts @timer must neither be enqueued nor - * running the callback and the caller needs to take care of - * serialization. - * - * Note: This only updates the timer expiry value and does not requeue - * the timer. + * It is a variant of hrtimer_forward(). The timer will expire after the current + * time of the hrtimer clock base. See hrtimer_forward() for details. */ static inline u64 hrtimer_forward_now(struct hrtimer *timer, ktime_t interval) diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index 2d3e3c5fb946..c3b4b7ed7c16 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -3,6 +3,8 @@ #define _LINUX_HRTIMER_DEFS_H #include <linux/ktime.h> +#include <linux/timerqueue.h> +#include <linux/seqlock.h> #ifdef CONFIG_HIGH_RES_TIMERS @@ -24,4 +26,106 @@ #endif +#ifdef CONFIG_64BIT +# define __hrtimer_clock_base_align ____cacheline_aligned +#else +# define __hrtimer_clock_base_align +#endif + +/** + * struct hrtimer_clock_base - the timer base for a specific clock + * @cpu_base: per cpu clock base + * @index: clock type index for per_cpu support when moving a + * timer to a base on another cpu. + * @clockid: clock id for per_cpu support + * @seq: seqcount around __run_hrtimer + * @running: pointer to the currently running hrtimer + * @active: red black tree root node for the active timers + * @get_time: function to retrieve the current time of the clock + * @offset: offset of this clock to the monotonic base + */ +struct hrtimer_clock_base { + struct hrtimer_cpu_base *cpu_base; + unsigned int index; + clockid_t clockid; + seqcount_raw_spinlock_t seq; + struct hrtimer *running; + struct timerqueue_head active; + ktime_t (*get_time)(void); + ktime_t offset; +} __hrtimer_clock_base_align; + +enum hrtimer_base_type { + HRTIMER_BASE_MONOTONIC, + HRTIMER_BASE_REALTIME, + HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_TAI, + HRTIMER_BASE_MONOTONIC_SOFT, + HRTIMER_BASE_REALTIME_SOFT, + HRTIMER_BASE_BOOTTIME_SOFT, + HRTIMER_BASE_TAI_SOFT, + HRTIMER_MAX_CLOCK_BASES, +}; + +/** + * struct hrtimer_cpu_base - the per cpu clock bases + * @lock: lock protecting the base and associated clock bases + * and timers + * @cpu: cpu number + * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set_seq: Sequence counter of clock was set events + * @hres_active: State of high resolution mode + * @in_hrtirq: hrtimer_interrupt() is currently executing + * @hang_detected: The last hrtimer interrupt detected a hang + * @softirq_activated: displays, if the softirq is raised - update of softirq + * related settings is not required then. + * @nr_events: Total number of hrtimer interrupt events + * @nr_retries: Total number of hrtimer interrupt retries + * @nr_hangs: Total number of hrtimer interrupt hangs + * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired + * @online: CPU is online from an hrtimers point of view + * @timer_waiters: A hrtimer_cancel() invocation waits for the timer + * callback to finish. + * @expires_next: absolute time of the next event, is required for remote + * hrtimer enqueue; it is the total first expiry time (hard + * and soft hrtimer are taken into account) + * @next_timer: Pointer to the first expiring timer + * @softirq_expires_next: Time to check, if soft queues needs also to be expired + * @softirq_next_timer: Pointer to the first expiring softirq based timer + * @clock_base: array of clock bases for this cpu + * + * Note: next_timer is just an optimization for __remove_hrtimer(). + * Do not dereference the pointer because it is not reliable on + * cross cpu removals. + */ +struct hrtimer_cpu_base { + raw_spinlock_t lock; + unsigned int cpu; + unsigned int active_bases; + unsigned int clock_was_set_seq; + unsigned int hres_active : 1, + in_hrtirq : 1, + hang_detected : 1, + softirq_activated : 1, + online : 1; +#ifdef CONFIG_HIGH_RES_TIMERS + unsigned int nr_events; + unsigned short nr_retries; + unsigned short nr_hangs; + unsigned int max_hang_time; +#endif +#ifdef CONFIG_PREEMPT_RT + spinlock_t softirq_expiry_lock; + atomic_t timer_waiters; +#endif + ktime_t expires_next; + struct hrtimer *next_timer; + ktime_t softirq_expires_next; + struct hrtimer *softirq_next_timer; + struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; +} ____cacheline_aligned; + + #endif diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 5adb86af35fc..c8d3ec116e29 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -64,9 +64,6 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj, enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; -#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) -#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) - /* * Mask of all large folio orders supported for anonymous THP; all orders up to * and including PMD_ORDER, except order-0 (which is not "huge") and order-1 @@ -84,17 +81,32 @@ extern struct kobj_attribute shmem_enabled_attr; */ #define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) -#define thp_vma_allowable_order(vma, vm_flags, smaps, in_pf, enforce_sysfs, order) \ - (!!thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, enforce_sysfs, BIT(order))) +#define TVA_SMAPS (1 << 0) /* Will be used for procfs */ +#define TVA_IN_PF (1 << 1) /* Page fault handler */ +#define TVA_ENFORCE_SYSFS (1 << 2) /* Obey sysfs configuration */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \ + (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order))) + +#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES #define HPAGE_PMD_SHIFT PMD_SHIFT -#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) +#define HPAGE_PUD_SHIFT PUD_SHIFT +#else +#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) +#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) +#endif + +#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) +#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) +#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) -#define HPAGE_PUD_SHIFT PUD_SHIFT -#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) +#define HPAGE_PUD_ORDER (HPAGE_PUD_SHIFT-PAGE_SHIFT) +#define HPAGE_PUD_NR (1<<HPAGE_PUD_ORDER) #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) +#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE extern unsigned long transparent_hugepage_flags; extern unsigned long huge_anon_orders_always; @@ -210,17 +222,15 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma) } unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs, + unsigned long vm_flags, + unsigned long tva_flags, unsigned long orders); /** * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma * @vma: the vm area to check * @vm_flags: use these vm_flags instead of vma->vm_flags - * @smaps: whether answer will be used for smaps file - * @in_pf: whether answer will be used by page fault handler - * @enforce_sysfs: whether sysfs config should be taken into account + * @tva_flags: Which TVA flags to honour * @orders: bitfield of all orders to consider * * Calculates the intersection of the requested hugepage orders and the allowed @@ -233,12 +243,12 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, */ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs, + unsigned long vm_flags, + unsigned long tva_flags, unsigned long orders) { /* Optimization to check if required orders are enabled early. */ - if (enforce_sysfs && vma_is_anonymous(vma)) { + if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) { unsigned long mask = READ_ONCE(huge_anon_orders_always); if (vm_flags & VM_HUGEPAGE) @@ -252,8 +262,30 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, return 0; } - return __thp_vma_allowable_orders(vma, vm_flags, smaps, in_pf, - enforce_sysfs, orders); + return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); +} + +enum mthp_stat_item { + MTHP_STAT_ANON_FAULT_ALLOC, + MTHP_STAT_ANON_FAULT_FALLBACK, + MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, + MTHP_STAT_ANON_SWPOUT, + MTHP_STAT_ANON_SWPOUT_FALLBACK, + __MTHP_STAT_COUNT +}; + +struct mthp_stat { + unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT]; +}; + +DECLARE_PER_CPU(struct mthp_stat, mthp_stats); + +static inline void count_mthp_stat(int order, enum mthp_stat_item item) +{ + if (order <= 0 || order > PMD_ORDER) + return; + + this_cpu_inc(mthp_stats.stats[order][item]); } #define transparent_hugepage_use_zero_page() \ @@ -262,13 +294,16 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); +unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags, + vm_flags_t vm_flags); -void folio_prep_large_rmappable(struct folio *folio); bool can_split_folio(struct folio *folio, int *pextra_pins); -int split_huge_page_to_list(struct page *page, struct list_head *list); +int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order); static inline int split_huge_page(struct page *page) { - return split_huge_page_to_list(page, NULL); + return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio); @@ -343,17 +378,15 @@ static inline bool folio_test_pmd_mappable(struct folio *folio) struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap); -struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, - pud_t *pud, int flags, struct dev_pagemap **pgmap); vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf); -extern struct page *huge_zero_page; +extern struct folio *huge_zero_folio; extern unsigned long huge_zero_pfn; -static inline bool is_huge_zero_page(struct page *page) +static inline bool is_huge_zero_folio(const struct folio *folio) { - return READ_ONCE(huge_zero_page) == page; + return READ_ONCE(huge_zero_folio) == folio; } static inline bool is_huge_zero_pmd(pmd_t pmd) @@ -366,8 +399,8 @@ static inline bool is_huge_zero_pud(pud_t pud) return false; } -struct page *mm_get_huge_zero_page(struct mm_struct *mm); -void mm_put_huge_zero_page(struct mm_struct *mm); +struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); +void mm_put_huge_zero_folio(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) @@ -377,13 +410,6 @@ static inline bool thp_migration_supported(void) } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) -#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) -#define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) - -#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) -#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) -#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) static inline bool folio_test_pmd_mappable(struct folio *folio) { @@ -403,26 +429,33 @@ static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma, } static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, - unsigned long vm_flags, bool smaps, - bool in_pf, bool enforce_sysfs, + unsigned long vm_flags, + unsigned long tva_flags, unsigned long orders) { return 0; } -static inline void folio_prep_large_rmappable(struct folio *folio) {} - #define transparent_hugepage_flags 0UL #define thp_get_unmapped_area NULL +static inline unsigned long +thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t vm_flags) +{ + return 0; +} + static inline bool can_split_folio(struct folio *folio, int *pextra_pins) { return false; } static inline int -split_huge_page_to_list(struct page *page, struct list_head *list) +split_huge_page_to_list_to_order(struct page *page, struct list_head *list, + unsigned int new_order) { return 0; } @@ -481,7 +514,7 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) return 0; } -static inline bool is_huge_zero_page(struct page *page) +static inline bool is_huge_zero_folio(const struct folio *folio) { return false; } @@ -496,7 +529,7 @@ static inline bool is_huge_zero_pud(pud_t pud) return false; } -static inline void mm_put_huge_zero_page(struct mm_struct *mm) +static inline void mm_put_huge_zero_folio(struct mm_struct *mm) { return; } @@ -507,39 +540,24 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, - unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap) -{ - return NULL; -} - static inline bool thp_migration_supported(void) { return false; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline int split_folio_to_list(struct folio *folio, - struct list_head *list) +static inline int split_folio_to_list_to_order(struct folio *folio, + struct list_head *list, int new_order) { - return split_huge_page_to_list(&folio->page, list); + return split_huge_page_to_list_to_order(&folio->page, list, new_order); } -static inline int split_folio(struct folio *folio) +static inline int split_folio_to_order(struct folio *folio, int new_order) { - return split_folio_to_list(folio, NULL); + return split_folio_to_list_to_order(folio, NULL, new_order); } -/* - * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to - * limitations in the implementation like arm64 MTE can override this to - * false - */ -#ifndef arch_thp_swp_supported -static inline bool arch_thp_swp_supported(void) -{ - return true; -} -#endif +#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0) +#define split_folio(f) split_folio_to_order(f, 0) #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c1ee640d87b1..2b3c3a404769 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -174,11 +174,14 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pud_t *pud); +bool hugetlbfs_pagecache_present(struct hstate *h, + struct vm_area_struct *vma, + unsigned long address); -struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); +struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); extern int sysctl_hugetlb_shm_group; -extern struct list_head huge_boot_pages; +extern struct list_head huge_boot_pages[MAX_NUMNODES]; /* arch callbacks */ @@ -272,13 +275,9 @@ void hugetlb_vma_unlock_write(struct vm_area_struct *vma); int hugetlb_vma_trylock_write(struct vm_area_struct *vma); void hugetlb_vma_assert_locked(struct vm_area_struct *vma); void hugetlb_vma_lock_release(struct kref *kref); - -int pmd_huge(pmd_t pmd); -int pud_huge(pud_t pud); long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags); - bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); @@ -298,8 +297,8 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } -static inline struct address_space *hugetlb_page_mapping_lock_write( - struct page *hpage) +static inline struct address_space *hugetlb_folio_mapping_lock_write( + struct folio *folio) { return NULL; } @@ -329,13 +328,6 @@ static inline void hugetlb_zap_end( { } -static inline struct page *hugetlb_follow_page_mask( - struct vm_area_struct *vma, unsigned long address, unsigned int flags, - unsigned int *page_mask) -{ - BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ -} - static inline int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *dst_vma, @@ -399,16 +391,6 @@ static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) { } -static inline int pmd_huge(pmd_t pmd) -{ - return 0; -} - -static inline int pud_huge(pud_t pud) -{ - return 0; -} - static inline int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { @@ -493,16 +475,6 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } #endif /* !CONFIG_HUGETLB_PAGE */ -/* - * hugepages at page global directory. If arch support - * hugepages at pgd level, they need to define this. - */ -#ifndef pgd_huge -#define pgd_huge(x) 0 -#endif -#ifndef p4d_huge -#define p4d_huge(x) 0 -#endif #ifndef pgd_write static inline int pgd_write(pgd_t pgd) @@ -554,17 +526,13 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); } -extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, int creat_flags, int page_size_log); -static inline bool is_file_hugepages(struct file *file) +static inline bool is_file_hugepages(const struct file *file) { - if (file->f_op == &hugetlbfs_file_operations) - return true; - - return is_file_shm_hugepages(file); + return file->f_op->fop_flags & FOP_HUGE_PAGES; } static inline struct hstate *hstate_inode(struct inode *i) @@ -747,7 +715,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask, gfp_t gfp_mask); + nodemask_t *nmask, gfp_t gfp_mask, + bool allow_alloc_fallback); int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, @@ -859,9 +828,9 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, #define is_hugepage_only_range is_hugepage_only_range #endif -#ifndef arch_clear_hugepage_flags -static inline void arch_clear_hugepage_flags(struct page *page) { } -#define arch_clear_hugepage_flags arch_clear_hugepage_flags +#ifndef arch_clear_hugetlb_flags +static inline void arch_clear_hugetlb_flags(struct folio *folio) { } +#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags #endif #ifndef arch_make_huge_pte @@ -888,8 +857,8 @@ static inline int hstate_index(struct hstate *h) return h - hstates; } -extern int dissolve_free_huge_page(struct page *page); -extern int dissolve_free_huge_pages(unsigned long start_pfn, +int dissolve_free_hugetlb_folio(struct folio *folio); +int dissolve_free_hugetlb_folios(unsigned long start_pfn, unsigned long end_pfn); #ifdef CONFIG_MEMORY_FAILURE @@ -970,6 +939,30 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) return modified_mask; } +static inline bool htlb_allow_alloc_fallback(int reason) +{ + bool allowed_fallback = false; + + /* + * Note: the memory offline, memory failure and migration syscalls will + * be allowed to fallback to other nodes due to lack of a better chioce, + * that might break the per-node hugetlb pool. While other cases will + * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool. + */ + switch (reason) { + case MR_MEMORY_HOTPLUG: + case MR_MEMORY_FAILURE: + case MR_SYSCALL: + case MR_MEMPOLICY_MBIND: + allowed_fallback = true; + break; + default: + break; + } + + return allowed_fallback; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -1065,7 +1058,8 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, static inline struct folio * alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, - nodemask_t *nmask, gfp_t gfp_mask) + nodemask_t *nmask, gfp_t gfp_mask, + bool allow_alloc_fallback) { return NULL; } @@ -1150,12 +1144,12 @@ static inline int hstate_index(struct hstate *h) return 0; } -static inline int dissolve_free_huge_page(struct page *page) +static inline int dissolve_free_hugetlb_folio(struct folio *folio) { return 0; } -static inline int dissolve_free_huge_pages(unsigned long start_pfn, +static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn, unsigned long end_pfn) { return 0; @@ -1181,6 +1175,11 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) return 0; } +static inline bool htlb_allow_alloc_fallback(int reason) +{ + return false; +} + static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { @@ -1221,6 +1220,12 @@ static inline void hugetlb_register_node(struct node *node) static inline void hugetlb_unregister_node(struct node *node) { } + +static inline bool hugetlbfs_pagecache_present( + struct hstate *h, struct vm_area_struct *vma, unsigned long address) +{ + return false; +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 8cd6a6b33593..edf96f249eb5 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -141,6 +141,7 @@ enum hwmon_in_attributes { hwmon_in_rated_min, hwmon_in_rated_max, hwmon_in_beep, + hwmon_in_fault, }; #define HWMON_I_ENABLE BIT(hwmon_in_enable) @@ -162,6 +163,7 @@ enum hwmon_in_attributes { #define HWMON_I_RATED_MIN BIT(hwmon_in_rated_min) #define HWMON_I_RATED_MAX BIT(hwmon_in_rated_max) #define HWMON_I_BEEP BIT(hwmon_in_beep) +#define HWMON_I_FAULT BIT(hwmon_in_fault) enum hwmon_curr_attributes { hwmon_curr_enable, @@ -293,6 +295,8 @@ enum hwmon_humidity_attributes { hwmon_humidity_fault, hwmon_humidity_rated_min, hwmon_humidity_rated_max, + hwmon_humidity_min_alarm, + hwmon_humidity_max_alarm, }; #define HWMON_H_ENABLE BIT(hwmon_humidity_enable) @@ -306,6 +310,8 @@ enum hwmon_humidity_attributes { #define HWMON_H_FAULT BIT(hwmon_humidity_fault) #define HWMON_H_RATED_MIN BIT(hwmon_humidity_rated_min) #define HWMON_H_RATED_MAX BIT(hwmon_humidity_rated_max) +#define HWMON_H_MIN_ALARM BIT(hwmon_humidity_min_alarm) +#define HWMON_H_MAX_ALARM BIT(hwmon_humidity_max_alarm) enum hwmon_fan_attributes { hwmon_fan_enable, @@ -425,12 +431,12 @@ struct hwmon_channel_info { const u32 *config; }; -#define HWMON_CHANNEL_INFO(stype, ...) \ - (&(struct hwmon_channel_info) { \ - .type = hwmon_##stype, \ - .config = (u32 []) { \ - __VA_ARGS__, 0 \ - } \ +#define HWMON_CHANNEL_INFO(stype, ...) \ + (&(const struct hwmon_channel_info) { \ + .type = hwmon_##stype, \ + .config = (const u32 []) { \ + __VA_ARGS__, 0 \ + } \ }) /** diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6ef0557b4bff..5e39baa7f6cb 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -820,6 +820,8 @@ struct vmbus_requestor { #define VMBUS_RQST_RESET (U64_MAX - 3) struct vmbus_device { + /* preferred ring buffer size in KB, 0 means no preferred size for this device */ + size_t pref_ring_size; u16 dev_type; guid_t guid; bool perf_device; @@ -832,6 +834,7 @@ struct vmbus_gpadl { u32 gpadl_handle; u32 size; void *buffer; + bool decrypted; }; struct vmbus_channel { diff --git a/include/linux/i2c-mux.h b/include/linux/i2c-mux.h index 98ef73b7c8fd..1784ac7afb11 100644 --- a/include/linux/i2c-mux.h +++ b/include/linux/i2c-mux.h @@ -56,8 +56,7 @@ struct i2c_adapter *i2c_root_adapter(struct device *dev); * callback functions to perform hardware-specific mux control. */ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, - u32 force_nr, u32 chan_id, - unsigned int class); + u32 force_nr, u32 chan_id); void i2c_mux_del_adapters(struct i2c_mux_core *muxc); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 652ecb7abeda..5e6cd43a6dbd 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -24,8 +24,8 @@ #include <uapi/linux/i2c.h> extern const struct bus_type i2c_bus_type; -extern struct device_type i2c_adapter_type; -extern struct device_type i2c_client_type; +extern const struct device_type i2c_adapter_type; +extern const struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ @@ -931,7 +931,7 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap) static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) { - return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0); + return (msg->addr << 1) | (msg->flags & I2C_M_RD); } u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 83c4d060a559..de2dce743ee2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -9,7 +9,7 @@ * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH - * Copyright (c) 2018 - 2023 Intel Corporation + * Copyright (c) 2018 - 2024 Intel Corporation */ #ifndef LINUX_IEEE80211_H @@ -191,6 +191,11 @@ static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } +static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2) +{ + return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1); +} + static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) { return (sn1 + sn2) & IEEE80211_SN_MASK; @@ -808,6 +813,11 @@ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); } +static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr) +{ + return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ); +} + struct ieee80211s_hdr { u8 flags; u8 ttl; @@ -1277,6 +1287,24 @@ struct ieee80211_ttlm_elem { u8 optional[]; } __packed; +/** + * struct ieee80211_bss_load_elem - BSS Load elemen + * + * Defined in section 9.4.2.26 in IEEE 802.11-REVme D4.1 + * + * @sta_count: total number of STAs currently associated with the AP. + * @channel_util: Percentage of time that the access point sensed the channel + * was busy. This value is in range [0, 255], the highest value means + * 100% busy. + * @avail_admission_capa: remaining amount of medium time used for admission + * control. + */ +struct ieee80211_bss_load_elem { + __le16 sta_count; + u8 channel_util; + __le16 avail_admission_capa; +} __packed; + struct ieee80211_mgmt { __le16 frame_control; __le16 duration; @@ -1454,6 +1482,20 @@ struct ieee80211_mgmt { u8 max_tod_error; u8 max_toa_error; } __packed wnm_timing_msr; + struct { + u8 action_code; + u8 dialog_token; + u8 variable[]; + } __packed ttlm_req; + struct { + u8 action_code; + u8 dialog_token; + u8 status_code; + u8 variable[]; + } __packed ttlm_res; + struct { + u8 action_code; + } __packed ttlm_tear_down; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ @@ -2718,9 +2760,11 @@ static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 -#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 -#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 -#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 +#define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 +#define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 +#define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP 3 +#define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP 4 /** * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field @@ -3036,6 +3080,9 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 +#define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 @@ -3175,6 +3222,22 @@ ieee80211_eht_oper_size_ok(const u8 *data, u8 len) return len >= needed; } +/* must validate ieee80211_eht_oper_size_ok() first */ +static inline u16 +ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper) +{ + const struct ieee80211_eht_operation_info *info = + (const void *)eht_oper->optional; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) + return 0; + + if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) + return 0; + + return get_unaligned_le16(info->optional); +} + #define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) struct ieee80211_bandwidth_indication { @@ -3357,6 +3420,8 @@ enum ieee80211_statuscode { WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126, WLAN_STATUS_SAE_PK = 127, + WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, + WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, }; @@ -3682,6 +3747,7 @@ enum ieee80211_category { WLAN_CATEGORY_UNPROT_DMG = 20, WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_S1G = 22, + WLAN_CATEGORY_PROTECTED_EHT = 37, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; @@ -3745,6 +3811,13 @@ enum ieee80211_unprotected_wnm_actioncode { WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, }; +/* Protected EHT action codes */ +enum ieee80211_protected_eht_actioncode { + WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, + WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, + WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, +}; + /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, @@ -4845,6 +4918,10 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f #define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2 +#define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 @@ -4908,18 +4985,43 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) } /** + * ieee80211_mle_get_link_id - returns the link ID + * @data: the basic multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the BSS link ID can't be found, -1 will be returned + */ +static inline int ieee80211_mle_get_link_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* common points now at the beginning of ieee80211_mle_basic_common_info */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID)) + return -1; + + return *common; +} + +/** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count - * @mle: the basic multi link element + * @data: pointer to the basic multi link element * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). * * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), 0 will be returned. + * for it is clear), -1 will be returned. */ -static inline u8 -ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) +static inline int +ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) { + const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; @@ -4927,7 +5029,7 @@ ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle) common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)) - return 0; + return -1; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; @@ -4997,6 +5099,81 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) } /** + * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. + * @data: pointer to the multi link EHT IE + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD capabilities and operations field is not present, 0 will be + * returned. + */ +static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + + return get_unaligned_le16(common); +} + +/** + * ieee80211_mle_get_mld_id - returns the MLD ID + * @data: pointer to the multi link element + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + * + * If the MLD ID is not present, 0 will be returned. + */ +static inline u8 ieee80211_mle_get_mld_id(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + + return *common; +} + +/** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element @@ -5009,7 +5186,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) bool check_common_len = false; u16 control; - if (len < fixed) + if (!data || len < fixed) return false; control = le16_to_cpu(mle->control); @@ -5145,7 +5322,7 @@ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data, info_len += 1; return prof->sta_info_len >= info_len && - fixed + prof->sta_info_len <= len; + fixed + prof->sta_info_len - 1 <= len; } /** diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 2a7660843444..043d442994b0 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -27,44 +27,54 @@ struct tun_xdp_hdr { #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); + static inline bool tun_is_xdp_frame(void *ptr) { - return (unsigned long)ptr & TUN_XDP_FLAG; + return (unsigned long)ptr & TUN_XDP_FLAG; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { - return (void *)((unsigned long)xdp | TUN_XDP_FLAG); + return (void *)((unsigned long)xdp | TUN_XDP_FLAG); } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { - return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); } + void tun_ptr_free(void *ptr); #else #include <linux/err.h> #include <linux/errno.h> struct file; struct socket; + static inline struct socket *tun_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } + static inline struct ptr_ring *tun_get_tx_ring(struct file *f) { return ERR_PTR(-EINVAL); } + static inline bool tun_is_xdp_frame(void *ptr) { return false; } + static inline void *tun_xdp_to_ptr(struct xdp_frame *xdp) { return NULL; } + static inline struct xdp_frame *tun_ptr_to_xdp(void *ptr) { return NULL; } + static inline void tun_ptr_free(void *ptr) { } diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 719cf9cc6e1a..383614ebd760 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -48,6 +48,7 @@ struct iio_dev; * be used. * @irq_flags: flags for the interrupt used by the triggered buffer * @num_slots: Number of sequencer slots + * @irq_line: IRQ for reading conversions. If 0, spi->irq will be used */ struct ad_sigma_delta_info { int (*set_channel)(struct ad_sigma_delta *, unsigned int channel); @@ -62,6 +63,7 @@ struct ad_sigma_delta_info { unsigned int data_reg; unsigned long irq_flags; unsigned int num_slots; + int irq_line; }; /** @@ -89,6 +91,7 @@ struct ad_sigma_delta { unsigned int active_slots; unsigned int current_slot; unsigned int num_slots; + int irq_line; bool status_appended; /* map slots to channels in order to know what to expect from devices */ unsigned int *slots; diff --git a/include/linux/iio/adc/adi-axi-adc.h b/include/linux/iio/adc/adi-axi-adc.h deleted file mode 100644 index b7904992d561..000000000000 --- a/include/linux/iio/adc/adi-axi-adc.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Analog Devices Generic AXI ADC IP core driver/library - * Link: https://wiki.analog.com/resources/fpga/docs/axi_adc_ip - * - * Copyright 2012-2020 Analog Devices Inc. - */ -#ifndef __ADI_AXI_ADC_H__ -#define __ADI_AXI_ADC_H__ - -struct device; -struct iio_chan_spec; - -/** - * struct adi_axi_adc_chip_info - Chip specific information - * @name Chip name - * @id Chip ID (usually product ID) - * @channels Channel specifications of type @struct iio_chan_spec - * @num_channels Number of @channels - * @scale_table Supported scales by the chip; tuples of 2 ints - * @num_scales Number of scales in the table - * @max_rate Maximum sampling rate supported by the device - */ -struct adi_axi_adc_chip_info { - const char *name; - unsigned int id; - - const struct iio_chan_spec *channels; - unsigned int num_channels; - - const unsigned int (*scale_table)[2]; - int num_scales; - - unsigned long max_rate; -}; - -/** - * struct adi_axi_adc_conv - data of the ADC attached to the AXI ADC - * @chip_info chip info details for the client ADC - * @preenable_setup op to run in the client before enabling the AXI ADC - * @reg_access IIO debugfs_reg_access hook for the client ADC - * @read_raw IIO read_raw hook for the client ADC - * @write_raw IIO write_raw hook for the client ADC - * @read_avail IIO read_avail hook for the client ADC - */ -struct adi_axi_adc_conv { - const struct adi_axi_adc_chip_info *chip_info; - - int (*preenable_setup)(struct adi_axi_adc_conv *conv); - int (*reg_access)(struct adi_axi_adc_conv *conv, unsigned int reg, - unsigned int writeval, unsigned int *readval); - int (*read_raw)(struct adi_axi_adc_conv *conv, - struct iio_chan_spec const *chan, - int *val, int *val2, long mask); - int (*write_raw)(struct adi_axi_adc_conv *conv, - struct iio_chan_spec const *chan, - int val, int val2, long mask); - int (*read_avail)(struct adi_axi_adc_conv *conv, - struct iio_chan_spec const *chan, - const int **val, int *type, int *length, long mask); -}; - -struct adi_axi_adc_conv *devm_adi_axi_adc_conv_register(struct device *dev, - size_t sizeof_priv); - -void *adi_axi_adc_conv_priv(struct adi_axi_adc_conv *conv); - -#endif diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h new file mode 100644 index 000000000000..8099759d7242 --- /dev/null +++ b/include/linux/iio/backend.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _IIO_BACKEND_H_ +#define _IIO_BACKEND_H_ + +#include <linux/types.h> + +struct iio_chan_spec; +struct fwnode_handle; +struct iio_backend; +struct device; +struct iio_dev; + +enum iio_backend_data_type { + IIO_BACKEND_TWOS_COMPLEMENT, + IIO_BACKEND_OFFSET_BINARY, + IIO_BACKEND_DATA_TYPE_MAX +}; + +enum iio_backend_data_source { + IIO_BACKEND_INTERNAL_CONTINUOS_WAVE, + IIO_BACKEND_EXTERNAL, + IIO_BACKEND_DATA_SOURCE_MAX +}; + +/** + * IIO_BACKEND_EX_INFO - Helper for an IIO extended channel attribute + * @_name: Attribute name + * @_shared: Whether the attribute is shared between all channels + * @_what: Data private to the driver + */ +#define IIO_BACKEND_EX_INFO(_name, _shared, _what) { \ + .name = (_name), \ + .shared = (_shared), \ + .read = iio_backend_ext_info_get, \ + .write = iio_backend_ext_info_set, \ + .private = (_what), \ +} + +/** + * struct iio_backend_data_fmt - Backend data format + * @type: Data type. + * @sign_extend: Bool to tell if the data is sign extended. + * @enable: Enable/Disable the data format module. If disabled, + * not formatting will happen. + */ +struct iio_backend_data_fmt { + enum iio_backend_data_type type; + bool sign_extend; + bool enable; +}; + +/* vendor specific from 32 */ +enum iio_backend_test_pattern { + IIO_BACKEND_NO_TEST_PATTERN, + /* modified prbs9 */ + IIO_BACKEND_ADI_PRBS_9A = 32, + IIO_BACKEND_TEST_PATTERN_MAX +}; + +enum iio_backend_sample_trigger { + IIO_BACKEND_SAMPLE_TRIGGER_EDGE_FALLING, + IIO_BACKEND_SAMPLE_TRIGGER_EDGE_RISING, + IIO_BACKEND_SAMPLE_TRIGGER_MAX +}; + +/** + * struct iio_backend_ops - operations structure for an iio_backend + * @enable: Enable backend. + * @disable: Disable backend. + * @chan_enable: Enable one channel. + * @chan_disable: Disable one channel. + * @data_format_set: Configure the data format for a specific channel. + * @data_source_set: Configure the data source for a specific channel. + * @set_sample_rate: Configure the sampling rate for a specific channel. + * @test_pattern_set: Configure a test pattern. + * @chan_status: Get the channel status. + * @iodelay_set: Set digital I/O delay. + * @data_sample_trigger: Control when to sample data. + * @request_buffer: Request an IIO buffer. + * @free_buffer: Free an IIO buffer. + * @extend_chan_spec: Extend an IIO channel. + * @ext_info_set: Extended info setter. + * @ext_info_get: Extended info getter. + **/ +struct iio_backend_ops { + int (*enable)(struct iio_backend *back); + void (*disable)(struct iio_backend *back); + int (*chan_enable)(struct iio_backend *back, unsigned int chan); + int (*chan_disable)(struct iio_backend *back, unsigned int chan); + int (*data_format_set)(struct iio_backend *back, unsigned int chan, + const struct iio_backend_data_fmt *data); + int (*data_source_set)(struct iio_backend *back, unsigned int chan, + enum iio_backend_data_source data); + int (*set_sample_rate)(struct iio_backend *back, unsigned int chan, + u64 sample_rate_hz); + int (*test_pattern_set)(struct iio_backend *back, + unsigned int chan, + enum iio_backend_test_pattern pattern); + int (*chan_status)(struct iio_backend *back, unsigned int chan, + bool *error); + int (*iodelay_set)(struct iio_backend *back, unsigned int chan, + unsigned int taps); + int (*data_sample_trigger)(struct iio_backend *back, + enum iio_backend_sample_trigger trigger); + struct iio_buffer *(*request_buffer)(struct iio_backend *back, + struct iio_dev *indio_dev); + void (*free_buffer)(struct iio_backend *back, + struct iio_buffer *buffer); + int (*extend_chan_spec)(struct iio_backend *back, + struct iio_chan_spec *chan); + int (*ext_info_set)(struct iio_backend *back, uintptr_t private, + const struct iio_chan_spec *chan, + const char *buf, size_t len); + int (*ext_info_get)(struct iio_backend *back, uintptr_t private, + const struct iio_chan_spec *chan, char *buf); +}; + +int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan); +int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan); +int devm_iio_backend_enable(struct device *dev, struct iio_backend *back); +int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan, + const struct iio_backend_data_fmt *data); +int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan, + enum iio_backend_data_source data); +int iio_backend_set_sampling_freq(struct iio_backend *back, unsigned int chan, + u64 sample_rate_hz); +int iio_backend_test_pattern_set(struct iio_backend *back, + unsigned int chan, + enum iio_backend_test_pattern pattern); +int iio_backend_chan_status(struct iio_backend *back, unsigned int chan, + bool *error); +int iio_backend_iodelay_set(struct iio_backend *back, unsigned int lane, + unsigned int taps); +int iio_backend_data_sample_trigger(struct iio_backend *back, + enum iio_backend_sample_trigger trigger); +int devm_iio_backend_request_buffer(struct device *dev, + struct iio_backend *back, + struct iio_dev *indio_dev); +ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, + const struct iio_chan_spec *chan, + const char *buf, size_t len); +ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, + const struct iio_chan_spec *chan, char *buf); + +int iio_backend_extend_chan_spec(struct iio_dev *indio_dev, + struct iio_backend *back, + struct iio_chan_spec *chan); +void *iio_backend_get_priv(const struct iio_backend *conv); +struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name); +struct iio_backend * +__devm_iio_backend_get_from_fwnode_lookup(struct device *dev, + struct fwnode_handle *fwnode); + +int devm_iio_backend_register(struct device *dev, + const struct iio_backend_ops *ops, void *priv); + +#endif diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 18d3702fa95d..6e27e47077d5 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -132,7 +132,9 @@ int iio_dma_buffer_disable(struct iio_buffer *buffer, struct iio_dev *indio_dev); int iio_dma_buffer_read(struct iio_buffer *buffer, size_t n, char __user *user_buffer); -size_t iio_dma_buffer_data_available(struct iio_buffer *buffer); +int iio_dma_buffer_write(struct iio_buffer *buffer, size_t n, + const char __user *user_buffer); +size_t iio_dma_buffer_usage(struct iio_buffer *buffer); int iio_dma_buffer_set_bytes_per_datum(struct iio_buffer *buffer, size_t bpd); int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length); int iio_dma_buffer_request_update(struct iio_buffer *buffer); diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 5c355be89814..81d9a19aeb91 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -7,11 +7,28 @@ #ifndef __IIO_DMAENGINE_H__ #define __IIO_DMAENGINE_H__ +#include <linux/iio/buffer.h> + struct iio_dev; struct device; -int devm_iio_dmaengine_buffer_setup(struct device *dev, - struct iio_dev *indio_dev, - const char *channel); +void iio_dmaengine_buffer_free(struct iio_buffer *buffer); +struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, + struct iio_dev *indio_dev, + const char *channel, + enum iio_buffer_direction dir); + +#define iio_dmaengine_buffer_setup(dev, indio_dev, channel) \ + iio_dmaengine_buffer_setup_ext(dev, indio_dev, channel, \ + IIO_BUFFER_DIRECTION_IN) + +int devm_iio_dmaengine_buffer_setup_ext(struct device *dev, + struct iio_dev *indio_dev, + const char *channel, + enum iio_buffer_direction dir); + +#define devm_iio_dmaengine_buffer_setup(dev, indio_dev, channel) \ + devm_iio_dmaengine_buffer_setup_ext(dev, indio_dev, channel, \ + IIO_BUFFER_DIRECTION_IN) #endif diff --git a/include/linux/iio/common/inv_sensors_timestamp.h b/include/linux/iio/common/inv_sensors_timestamp.h index a47d304d1ba7..8d506f1e9df2 100644 --- a/include/linux/iio/common/inv_sensors_timestamp.h +++ b/include/linux/iio/common/inv_sensors_timestamp.h @@ -71,8 +71,7 @@ int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts, uint32_t period, bool fifo); void inv_sensors_timestamp_interrupt(struct inv_sensors_timestamp *ts, - uint32_t fifo_period, size_t fifo_nb, - size_t sensor_nb, int64_t timestamp); + size_t sample_nb, int64_t timestamp); static inline int64_t inv_sensors_timestamp_pop(struct inv_sensors_timestamp *ts) { diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index c5b36d2c1e73..55e2b22086a1 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/cdev.h> +#include <linux/cleanup.h> #include <linux/slab.h> #include <linux/iio/types.h> /* IIO TODO LIST */ @@ -638,10 +639,37 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); int iio_device_claim_direct_mode(struct iio_dev *indio_dev); void iio_device_release_direct_mode(struct iio_dev *indio_dev); + +/* + * This autocleanup logic is normally used via + * iio_device_claim_direct_scoped(). + */ +DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T), + iio_device_release_direct_mode(_T)) + +DEFINE_GUARD_COND(iio_claim_direct, _try, ({ + struct iio_dev *dev; + int d = iio_device_claim_direct_mode(_T); + + if (d < 0) + dev = NULL; + else + dev = _T; + dev; + })) + +/** + * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct. + * @fail: What to do on failure to claim device. + * @iio_dev: Pointer to the IIO devices structure + */ +#define iio_device_claim_direct_scoped(fail, iio_dev) \ + scoped_cond_guard(iio_claim_direct_try, fail, iio_dev) + int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); void iio_device_release_buffer_mode(struct iio_dev *indio_dev); -extern struct bus_type iio_bus_type; +extern const struct bus_type iio_bus_type; /** * iio_device_put() - reference counted deallocation of struct device @@ -760,6 +788,19 @@ static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev) } #endif +#ifdef CONFIG_ACPI +bool iio_read_acpi_mount_matrix(struct device *dev, + struct iio_mount_matrix *orientation, + char *acpi_method); +#else +static inline bool iio_read_acpi_mount_matrix(struct device *dev, + struct iio_mount_matrix *orientation, + char *acpi_method) +{ + return false; +} +#endif + ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals); int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, diff --git a/include/linux/ima.h b/include/linux/ima.h index 86b57757c7b1..0bae61a15b60 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -16,23 +16,6 @@ struct linux_binprm; #ifdef CONFIG_IMA extern enum hash_algo ima_get_current_hash_algo(void); -extern int ima_bprm_check(struct linux_binprm *bprm); -extern int ima_file_check(struct file *file, int mask); -extern void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode); -extern void ima_file_free(struct file *file); -extern int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); -extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); -extern int ima_load_data(enum kernel_load_data_id id, bool contents); -extern int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, char *description); -extern int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents); -extern int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id); -extern void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry); extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); @@ -57,68 +40,6 @@ static inline enum hash_algo ima_get_current_hash_algo(void) return HASH_ALGO__LAST; } -static inline int ima_bprm_check(struct linux_binprm *bprm) -{ - return 0; -} - -static inline int ima_file_check(struct file *file, int mask) -{ - return 0; -} - -static inline void ima_post_create_tmpfile(struct mnt_idmap *idmap, - struct inode *inode) -{ -} - -static inline void ima_file_free(struct file *file) -{ - return; -} - -static inline int ima_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) -{ - return 0; -} - -static inline int ima_file_mprotect(struct vm_area_struct *vma, - unsigned long prot) -{ - return 0; -} - -static inline int ima_load_data(enum kernel_load_data_id id, bool contents) -{ - return 0; -} - -static inline int ima_post_load_data(char *buf, loff_t size, - enum kernel_load_data_id id, - char *description) -{ - return 0; -} - -static inline int ima_read_file(struct file *file, enum kernel_read_file_id id, - bool contents) -{ - return 0; -} - -static inline int ima_post_read_file(struct file *file, void *buf, loff_t size, - enum kernel_read_file_id id) -{ - return 0; -} - -static inline void ima_post_path_mknod(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - static inline int ima_file_hash(struct file *file, char *buf, size_t buf_size) { return -EOPNOTSUPP; @@ -169,76 +90,13 @@ static inline void ima_add_kexec_buffer(struct kimage *image) {} #endif -#ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS -extern void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, size_t plen, - unsigned long flags, bool create); -#else -static inline void ima_post_key_create_or_update(struct key *keyring, - struct key *key, - const void *payload, - size_t plen, - unsigned long flags, - bool create) {} -#endif /* CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS */ - #ifdef CONFIG_IMA_APPRAISE extern bool is_ima_appraise_enabled(void); -extern void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry); -extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, - const void *xattr_value, size_t xattr_value_len); -extern int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl); -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return ima_inode_set_acl(idmap, dentry, acl_name, NULL); -} -extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else static inline bool is_ima_appraise_enabled(void) { return 0; } - -static inline void ima_inode_post_setattr(struct mnt_idmap *idmap, - struct dentry *dentry) -{ - return; -} - -static inline int ima_inode_setxattr(struct dentry *dentry, - const char *xattr_name, - const void *xattr_value, - size_t xattr_value_len) -{ - return 0; -} - -static inline int ima_inode_set_acl(struct mnt_idmap *idmap, - struct dentry *dentry, const char *acl_name, - struct posix_acl *kacl) -{ - - return 0; -} - -static inline int ima_inode_removexattr(struct dentry *dentry, - const char *xattr_name) -{ - return 0; -} - -static inline int ima_inode_remove_acl(struct mnt_idmap *idmap, - struct dentry *dentry, - const char *acl_name) -{ - return 0; -} #endif /* CONFIG_IMA_APPRAISE */ #if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index adb83a42a6b9..35227d47cfc9 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -2,7 +2,7 @@ #ifndef _LINUX_INDIRECT_CALL_WRAPPER_H #define _LINUX_INDIRECT_CALL_WRAPPER_H -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE /* * INDIRECT_CALL_$NR - wrapper for indirect calls with $NR known builtin diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 84abb30a3fbb..a9033696b0aa 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -8,6 +8,7 @@ struct inet_hashinfo; struct inet_diag_handler { + struct module *owner; void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r); diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ddb27fc0ee8c..cb5280e6cc21 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -53,13 +53,15 @@ struct in_device { }; #define IPV4_DEVCONF(cnf, attr) ((cnf).data[IPV4_DEVCONF_ ## attr - 1]) +#define IPV4_DEVCONF_RO(cnf, attr) READ_ONCE(IPV4_DEVCONF(cnf, attr)) #define IPV4_DEVCONF_ALL(net, attr) \ IPV4_DEVCONF((*(net)->ipv4.devconf_all), attr) +#define IPV4_DEVCONF_ALL_RO(net, attr) READ_ONCE(IPV4_DEVCONF_ALL(net, attr)) -static inline int ipv4_devconf_get(struct in_device *in_dev, int index) +static inline int ipv4_devconf_get(const struct in_device *in_dev, int index) { index--; - return in_dev->cnf.data[index]; + return READ_ONCE(in_dev->cnf.data[index]); } static inline void ipv4_devconf_set(struct in_device *in_dev, int index, @@ -67,7 +69,7 @@ static inline void ipv4_devconf_set(struct in_device *in_dev, int index, { index--; set_bit(index, in_dev->cnf.state); - in_dev->cnf.data[index] = val; + WRITE_ONCE(in_dev->cnf.data[index], val); } static inline void ipv4_devconf_setall(struct in_device *in_dev) @@ -81,18 +83,18 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ipv4_devconf_set((in_dev), IPV4_DEVCONF_ ## attr, (val)) #define IN_DEV_ANDCONF(in_dev, attr) \ - (IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \ + (IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr) && \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_NET_ORCONF(in_dev, net, attr) \ - (IPV4_DEVCONF_ALL(net, attr) || \ + (IPV4_DEVCONF_ALL_RO(net, attr) || \ IN_DEV_CONF_GET((in_dev), attr)) #define IN_DEV_ORCONF(in_dev, attr) \ IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr) #define IN_DEV_MAXCONF(in_dev, attr) \ - (max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \ + (max(IPV4_DEVCONF_ALL_RO(dev_net(in_dev->dev), attr), \ IN_DEV_CONF_GET((in_dev), attr))) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) diff --git a/include/linux/init.h b/include/linux/init.h index 3fa3f6241350..58cef4c2e59a 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -168,12 +168,8 @@ extern initcall_entry_t __initcall_end[]; extern struct file_system_type rootfs_fs_type; -#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) extern bool rodata_enabled; -#endif -#ifdef CONFIG_STRICT_KERNEL_RWX void mark_rodata_ro(void); -#endif extern void (*late_time_init)(void); diff --git a/include/linux/input.h b/include/linux/input.h index de6503c0edb8..c22ac465254b 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -514,7 +514,7 @@ void input_enable_softrepeat(struct input_dev *dev, int delay, int period); bool input_device_enabled(struct input_dev *dev); -extern struct class input_class; +extern const struct class input_class; /** * struct ff_device - force-feedback part of an input device diff --git a/include/linux/input/navpoint.h b/include/linux/input/navpoint.h deleted file mode 100644 index 5192ae3f5ec1..000000000000 --- a/include/linux/input/navpoint.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 Paul Parsons <lost.distance@yahoo.com> - */ - -struct navpoint_platform_data { - int port; /* PXA SSP port for pxa_ssp_request() */ -}; diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h index 1b608e00290a..711a1f0d1a73 100644 --- a/include/linux/instrumented.h +++ b/include/linux/instrumented.h @@ -148,6 +148,41 @@ instrument_copy_from_user_after(const void *to, const void __user *from, } /** + * instrument_memcpy_before - add instrumentation before non-instrumented memcpy + * @to: destination address + * @from: source address + * @n: number of bytes to copy + * + * Instrument memory accesses that happen in custom memcpy implementations. The + * instrumentation should be inserted before the memcpy call. + */ +static __always_inline void instrument_memcpy_before(void *to, const void *from, + unsigned long n) +{ + kasan_check_write(to, n); + kasan_check_read(from, n); + kcsan_check_write(to, n); + kcsan_check_read(from, n); +} + +/** + * instrument_memcpy_after - add instrumentation after non-instrumented memcpy + * @to: destination address + * @from: source address + * @n: number of bytes to copy + * @left: number of bytes not copied (if known) + * + * Instrument memory accesses that happen in custom memcpy implementations. The + * instrumentation should be inserted after the memcpy call. + */ +static __always_inline void instrument_memcpy_after(void *to, const void *from, + unsigned long n, + unsigned long left) +{ + kmsan_memmove(to, from, n - left); +} + +/** * instrument_get_user() - add instrumentation to get_user()-like macros * @to: destination variable, may not be address-taken * diff --git a/include/linux/integrity.h b/include/linux/integrity.h index 2ea0f2f65ab6..f5842372359b 100644 --- a/include/linux/integrity.h +++ b/include/linux/integrity.h @@ -8,6 +8,7 @@ #define _LINUX_INTEGRITY_H #include <linux/fs.h> +#include <linux/iversion.h> enum integrity_status { INTEGRITY_PASS = 0, @@ -19,40 +20,46 @@ enum integrity_status { INTEGRITY_UNKNOWN, }; -/* List of EVM protected security xattrs */ #ifdef CONFIG_INTEGRITY -extern struct integrity_iint_cache *integrity_inode_get(struct inode *inode); -extern void integrity_inode_free(struct inode *inode); extern void __init integrity_load_keys(void); #else -static inline struct integrity_iint_cache * - integrity_inode_get(struct inode *inode) -{ - return NULL; -} - -static inline void integrity_inode_free(struct inode *inode) -{ - return; -} - static inline void integrity_load_keys(void) { } #endif /* CONFIG_INTEGRITY */ -#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS - -extern int integrity_kernel_module_request(char *kmod_name); +/* An inode's attributes for detection of changes */ +struct integrity_inode_attributes { + u64 version; /* track inode changes */ + unsigned long ino; + dev_t dev; +}; -#else +/* + * On stacked filesystems the i_version alone is not enough to detect file data + * or metadata change. Additional metadata is required. + */ +static inline void +integrity_inode_attrs_store(struct integrity_inode_attributes *attrs, + u64 i_version, const struct inode *inode) +{ + attrs->version = i_version; + attrs->dev = inode->i_sb->s_dev; + attrs->ino = inode->i_ino; +} -static inline int integrity_kernel_module_request(char *kmod_name) +/* + * On stacked filesystems detect whether the inode or its content has changed. + */ +static inline bool +integrity_inode_attrs_changed(const struct integrity_inode_attributes *attrs, + const struct inode *inode) { - return 0; + return (inode->i_sb->s_dev != attrs->dev || + inode->i_ino != attrs->ino || + !inode_eq_iversion(inode, attrs->version)); } -#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */ #endif /* _LINUX_INTEGRITY_H */ diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 33f21bd85dbf..c0397423d3a8 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -158,6 +158,26 @@ struct rapl_if_priv { void *rpi; }; +#ifdef CONFIG_PERF_EVENTS +/** + * struct rapl_package_pmu_data: Per package data for PMU support + * @scale: Scale of 2^-32 Joules for each energy counter increase. + * @lock: Lock to protect n_active and active_list. + * @n_active: Number of active events. + * @active_list: List of active events. + * @timer_interval: Maximum timer expiration time before counter overflow. + * @hrtimer: Periodically update the counter to prevent overflow. + */ +struct rapl_package_pmu_data { + u64 scale[RAPL_DOMAIN_MAX]; + raw_spinlock_t lock; + int n_active; + struct list_head active_list; + ktime_t timer_interval; + struct hrtimer hrtimer; +}; +#endif + /* maximum rapl package domain name: package-%d-die-%d */ #define PACKAGE_DOMAIN_NAME_LENGTH 30 @@ -176,10 +196,28 @@ struct rapl_package { struct cpumask cpumask; char name[PACKAGE_DOMAIN_NAME_LENGTH]; struct rapl_if_priv *priv; +#ifdef CONFIG_PERF_EVENTS + bool has_pmu; + struct rapl_package_pmu_data pmu_data; +#endif }; +struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +void rapl_remove_package_cpuslocked(struct rapl_package *rp); + struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu); struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu); void rapl_remove_package(struct rapl_package *rp); +#ifdef CONFIG_PERF_EVENTS +int rapl_package_add_pmu(struct rapl_package *rp); +void rapl_package_remove_pmu(struct rapl_package *rp); +#else +static inline int rapl_package_add_pmu(struct rapl_package *rp) { return 0; } +static inline void rapl_package_remove_pmu(struct rapl_package *rp) { } +#endif + #endif /* __INTEL_RAPL_H__ */ diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h index f422612c28d6..8ff8eabb4a98 100644 --- a/include/linux/intel_tcc.h +++ b/include/linux/intel_tcc.h @@ -13,6 +13,6 @@ int intel_tcc_get_tjmax(int cpu); int intel_tcc_get_offset(int cpu); int intel_tcc_set_offset(int cpu, int offset); -int intel_tcc_get_temp(int cpu, bool pkg); +int intel_tcc_get_temp(int cpu, int *temp, bool pkg); #endif /* __INTEL_TCC_H__ */ diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index a3529b962be6..1e880cb0f454 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -27,16 +27,22 @@ enum intel_tpmi_id { /** * struct intel_tpmi_plat_info - Platform information for a TPMI device instance - * @package_id: CPU Package id - * @bus_number: PCI bus number - * @device_number: PCI device number + * @cdie_mask: Mask of all compute dies in the partition + * @package_id: CPU Package id + * @partition: Package partition id when multiple VSEC PCI devices per package + * @segment: PCI segment ID + * @bus_number: PCI bus number + * @device_number: PCI device number * @function_number: PCI function number * * Structure to store platform data for a TPMI device instance. This * struct is used to return data via tpmi_get_platform_data(). */ struct intel_tpmi_plat_info { + u16 cdie_mask; u8 package_id; + u8 partition; + u8 segment; u8 bus_number; u8 device_number; u8 function_number; diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index 7ba183f221f1..f5aef8784692 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -36,7 +36,7 @@ struct icc_onecell_data { struct icc_node *nodes[] __counted_by(num_nodes); }; -struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec, +struct icc_node *of_icc_xlate_onecell(const struct of_phandle_args *spec, void *data); /** @@ -65,8 +65,9 @@ struct icc_provider { u32 peak_bw, u32 *agg_avg, u32 *agg_peak); void (*pre_aggregate)(struct icc_node *node); int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak); - struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data); - struct icc_node_data* (*xlate_extended)(struct of_phandle_args *spec, void *data); + struct icc_node* (*xlate)(const struct of_phandle_args *spec, void *data); + struct icc_node_data* (*xlate_extended)(const struct of_phandle_args *spec, + void *data); struct device *dev; int users; bool inter_set; @@ -124,7 +125,7 @@ int icc_nodes_remove(struct icc_provider *provider); void icc_provider_init(struct icc_provider *provider); int icc_provider_register(struct icc_provider *provider); void icc_provider_deregister(struct icc_provider *provider); -struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); +struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec); void icc_sync_state(struct device *dev); #else @@ -171,7 +172,7 @@ static inline int icc_provider_register(struct icc_provider *provider) static inline void icc_provider_deregister(struct icc_provider *provider) { } -static inline struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec) +static inline struct icc_node_data *of_icc_get_from_provider(const struct of_phandle_args *spec) { return ERR_PTR(-ENOTSUPP); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 76121c2bb4f8..5c9bdd3ffccc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -67,6 +67,8 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared + * interrupt. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +84,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_COND_ONESHOT 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/include/linux/io.h b/include/linux/io.h index 7304f2a69960..59ec5eea696c 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -6,6 +6,7 @@ #ifndef _LINUX_IO_H #define _LINUX_IO_H +#include <linux/sizes.h> #include <linux/types.h> #include <linux/init.h> #include <linux/bug.h> @@ -16,19 +17,32 @@ struct device; struct resource; -__visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +#ifndef __iowrite32_copy +void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +#endif + void __ioread32_copy(void *to, const void __iomem *from, size_t count); + +#ifndef __iowrite64_copy void __iowrite64_copy(void __iomem *to, const void *from, size_t count); +#endif #ifdef CONFIG_MMU int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot); +int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot); #else static inline int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { return 0; } +static inline int vmap_page_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) +{ + return 0; +} #endif /* diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 68ed6697fece..e123d5e17b52 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -11,7 +11,6 @@ void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); void io_uring_unreg_ringfd(void); const char *io_uring_get_opcode(u8 opcode); -int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); bool io_is_uring_fops(struct file *file); static inline void io_uring_files_cancel(void) @@ -45,11 +44,6 @@ static inline const char *io_uring_get_opcode(u8 opcode) { return ""; } -static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, - unsigned int issue_flags) -{ - return -EOPNOTSUPP; -} static inline bool io_is_uring_fops(struct file *file) { return false; diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index e453a997c060..447fbfd32215 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -26,12 +26,25 @@ static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); + +/* + * Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd + * and the corresponding io_uring request. + * + * Note: the caller should never hard code @issue_flags and is only allowed + * to pass the mask provided by the core io_uring code. + */ void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, unsigned issue_flags); + void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned), unsigned flags); +/* + * Note: the caller should never hard code @issue_flags and only use the + * mask provided by the core io_uring code. + */ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, unsigned int issue_flags); @@ -56,6 +69,17 @@ static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, } #endif +/* + * Polled completions must ensure they are coming from a poll queue, and + * hence are completed inside the usual poll handling loops. + */ +static inline void io_uring_cmd_iopoll_done(struct io_uring_cmd *ioucmd, + ssize_t ret, ssize_t res2) +{ + lockdep_assert(in_task()); + io_uring_cmd_done(ioucmd, ret, res2, 0); +} + /* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */ static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *, unsigned)) diff --git a/include/linux/io_uring/net.h b/include/linux/io_uring/net.h new file mode 100644 index 000000000000..b58f39fed4d5 --- /dev/null +++ b/include/linux/io_uring/net.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IO_URING_NET_H +#define _LINUX_IO_URING_NET_H + +struct io_uring_cmd; + +#if defined(CONFIG_IO_URING) +int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags); + +#else +static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 854ad67a5f70..7a6b190c7da7 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -2,6 +2,7 @@ #define IO_URING_TYPES_H #include <linux/blkdev.h> +#include <linux/hashtable.h> #include <linux/task_work.h> #include <linux/bitmap.h> #include <linux/llist.h> @@ -12,7 +13,7 @@ enum { * A hint to not wake right away but delay until there are enough of * tw's queued to match the number of CQEs the task is waiting for. * - * Must not be used wirh requests generating more than one CQE. + * Must not be used with requests generating more than one CQE. * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set. */ IOU_F_TWQ_LAZY_WAKE = 1, @@ -204,6 +205,7 @@ struct io_submit_state { bool plug_started; bool need_plug; + bool cq_flush; unsigned short submit_nr; unsigned int cqes_count; struct blk_plug plug; @@ -218,7 +220,7 @@ struct io_ev_fd { }; struct io_alloc_cache { - struct io_wq_work_node list; + void **entries; unsigned int nr_cached; unsigned int max_cached; size_t elem_size; @@ -240,12 +242,14 @@ struct io_ring_ctx { unsigned int poll_activated: 1; unsigned int drain_disabled: 1; unsigned int compat: 1; + unsigned int iowq_limits_set : 1; struct task_struct *submitter_task; struct io_rings *rings; struct percpu_ref refs; enum task_work_notify_mode notify_method; + unsigned sq_thread_idle; } ____cacheline_aligned_in_smp; /* submission data */ @@ -274,28 +278,30 @@ struct io_ring_ctx { */ struct io_rsrc_node *rsrc_node; atomic_t cancel_seq; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + bool poll_multi_queue; + struct io_wq_work_list iopoll_list; + struct io_file_table file_table; + struct io_mapped_ubuf **user_bufs; unsigned nr_user_files; unsigned nr_user_bufs; - struct io_mapped_ubuf **user_bufs; struct io_submit_state submit_state; - struct io_buffer_list *io_bl; struct xarray io_bl_xa; struct io_hash_table cancel_table_locked; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; - - /* - * ->iopoll_list is protected by the ctx->uring_lock for - * io_uring instances that don't use IORING_SETUP_SQPOLL. - * For SQPOLL, only the single threaded io_sq_thread() will - * manipulate the list, hence no extra locking is needed there. - */ - struct io_wq_work_list iopoll_list; - bool poll_multi_queue; + struct io_alloc_cache rw_cache; + struct io_alloc_cache uring_cache; /* * Any cancelable uring_cmd is added to this list in @@ -338,14 +344,8 @@ struct io_ring_ctx { unsigned cq_last_tm_flush; } ____cacheline_aligned_in_smp; - struct io_uring_cqe completion_cqes[16]; - spinlock_t completion_lock; - /* IRQ completion list, under ->completion_lock */ - struct io_wq_work_list locked_free_list; - unsigned int locked_free_nr; - struct list_head io_buffers_comp; struct list_head cq_overflow_list; struct io_hash_table cancel_table; @@ -366,14 +366,8 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; - struct xarray personalities; - u32 pers_next; - struct list_head io_buffers_cache; - /* deferred free list, protected by ->uring_lock */ - struct hlist_head io_buf_list; - /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; @@ -389,6 +383,9 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; + u32 pers_next; + struct xarray personalities; + /* hashed buffered write serialization */ struct io_wq_hash *hash_map; @@ -405,11 +402,22 @@ struct io_ring_ctx { /* io-wq management, e.g. thread count */ u32 iowq_limits[2]; - bool iowq_limits_set; struct callback_head poll_wq_task_work; struct list_head defer_list; - unsigned sq_thread_idle; + +#ifdef CONFIG_NET_RX_BUSY_POLL + struct list_head napi_list; /* track busy poll napi_id */ + spinlock_t napi_lock; /* napi_list lock */ + + /* napi busy poll default timeout */ + unsigned int napi_busy_poll_to; + bool napi_prefer_busy_poll; + bool napi_enabled; + + DECLARE_HASHTABLE(napi_ht, 4); +#endif + /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; @@ -424,8 +432,6 @@ struct io_ring_ctx { }; struct io_tw_state { - /* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */ - bool locked; }; enum { @@ -455,7 +461,6 @@ enum { REQ_F_SKIP_LINK_CQES_BIT, REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, - REQ_F_PARTIAL_IO_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, REQ_F_HASH_LOCKED_BIT, @@ -463,75 +468,91 @@ enum { REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, REQ_F_POLL_NO_LAZY_BIT, + REQ_F_CANCEL_SEQ_BIT, + REQ_F_CAN_POLL_BIT, + REQ_F_BL_EMPTY_BIT, + REQ_F_BL_NO_RECYCLE_BIT, + REQ_F_BUFFERS_COMMIT_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, }; +typedef u64 __bitwise io_req_flags_t; +#define IO_REQ_FLAG(bitno) ((__force io_req_flags_t) BIT_ULL((bitno))) + enum { /* ctx owns file */ - REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + REQ_F_FIXED_FILE = IO_REQ_FLAG(REQ_F_FIXED_FILE_BIT), /* drain existing IO first */ - REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + REQ_F_IO_DRAIN = IO_REQ_FLAG(REQ_F_IO_DRAIN_BIT), /* linked sqes */ - REQ_F_LINK = BIT(REQ_F_LINK_BIT), + REQ_F_LINK = IO_REQ_FLAG(REQ_F_LINK_BIT), /* doesn't sever on completion < 0 */ - REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + REQ_F_HARDLINK = IO_REQ_FLAG(REQ_F_HARDLINK_BIT), /* IOSQE_ASYNC */ - REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + REQ_F_FORCE_ASYNC = IO_REQ_FLAG(REQ_F_FORCE_ASYNC_BIT), /* IOSQE_BUFFER_SELECT */ - REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + REQ_F_BUFFER_SELECT = IO_REQ_FLAG(REQ_F_BUFFER_SELECT_BIT), /* IOSQE_CQE_SKIP_SUCCESS */ - REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + REQ_F_CQE_SKIP = IO_REQ_FLAG(REQ_F_CQE_SKIP_BIT), /* fail rest of links */ - REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + REQ_F_FAIL = IO_REQ_FLAG(REQ_F_FAIL_BIT), /* on inflight list, should be cancelled and waited on exit reliably */ - REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + REQ_F_INFLIGHT = IO_REQ_FLAG(REQ_F_INFLIGHT_BIT), /* read/write uses file position */ - REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + REQ_F_CUR_POS = IO_REQ_FLAG(REQ_F_CUR_POS_BIT), /* must not punt to workers */ - REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + REQ_F_NOWAIT = IO_REQ_FLAG(REQ_F_NOWAIT_BIT), /* has or had linked timeout */ - REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + REQ_F_LINK_TIMEOUT = IO_REQ_FLAG(REQ_F_LINK_TIMEOUT_BIT), /* needs cleanup */ - REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT), /* already went through poll handler */ - REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT), /* buffer already selected */ - REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT), /* buffer selected from ring, needs commit */ - REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + REQ_F_BUFFER_RING = IO_REQ_FLAG(REQ_F_BUFFER_RING_BIT), /* caller should reissue async */ - REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + REQ_F_REISSUE = IO_REQ_FLAG(REQ_F_REISSUE_BIT), /* supports async reads/writes */ - REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG(REQ_F_SUPPORT_NOWAIT_BIT), /* regular file */ - REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + REQ_F_ISREG = IO_REQ_FLAG(REQ_F_ISREG_BIT), /* has creds assigned */ - REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + REQ_F_CREDS = IO_REQ_FLAG(REQ_F_CREDS_BIT), /* skip refcounting if not set */ - REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + REQ_F_REFCOUNT = IO_REQ_FLAG(REQ_F_REFCOUNT_BIT), /* there is a linked timeout that has to be armed */ - REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG(REQ_F_ARM_LTIMEOUT_BIT), /* ->async_data allocated */ - REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + REQ_F_ASYNC_DATA = IO_REQ_FLAG(REQ_F_ASYNC_DATA_BIT), /* don't post CQEs while failing linked requests */ - REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG(REQ_F_SKIP_LINK_CQES_BIT), /* single poll may be active */ - REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT), /* double poll may active */ - REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), - /* request has already done partial IO */ - REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT), /* fast poll multishot mode */ - REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ - REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT), + REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT), /* hashed into ->cancel_hash_locked, protected by ->uring_lock */ - REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), + REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), /* don't use lazy poll wake for this request */ - REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT), + REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), + /* cancel sequence is set and valid */ + REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), + /* file is pollable */ + REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), + /* buffer list was empty after selection of buffer */ + REQ_F_BL_EMPTY = IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT), + /* don't recycle provided buffers for this request */ + REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT), + /* buffer ring head needs incrementing on put */ + REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), }; typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); @@ -592,15 +613,17 @@ struct io_kiocb { * and after selection it points to the buffer ID itself. */ u16 buf_index; - unsigned int flags; + + unsigned nr_tw; + + /* REQ_F_* flags */ + io_req_flags_t flags; struct io_cqe cqe; struct io_ring_ctx *ctx; struct task_struct *task; - struct io_rsrc_node *rsrc_node; - union { /* store used ubuf, so we can prevent reloading */ struct io_mapped_ubuf *imu; @@ -621,10 +644,12 @@ struct io_kiocb { /* cache ->apoll->events */ __poll_t apoll_events; }; + + struct io_rsrc_node *rsrc_node; + atomic_t refs; atomic_t poll_refs; struct io_task_work io_task_work; - unsigned nr_tw; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; /* internal polling, see IORING_FEAT_FAST_POLL */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 96dd0acbba44..6fc1c858013d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -293,22 +293,32 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ - u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ - struct bio *io_bio; /* bio being built */ - struct bio io_inline_bio; /* MUST BE LAST! */ + struct bio io_bio; /* MUST BE LAST! */ }; +static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) +{ + return container_of(bio, struct iomap_ioend, io_bio); +} + struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on * the range starting at offset. + * + * Can return arbitrarily large regions, but we need to call into it at + * least once per folio to allow the file systems to synchronize with + * the write path that could be invalidating mappings. + * + * An existing mapping from a previous call to this method can be reused + * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, - loff_t offset); + loff_t offset, unsigned len); /* * Optional, allows the file systems to perform actions just before @@ -329,6 +339,7 @@ struct iomap_writepage_ctx { struct iomap iomap; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; + u32 nr_folios; /* folios added to the ioend */ }; void iomap_finish_ioends(struct iomap_ioend *ioend, int error); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 5e27cb3a3be9..7bc8dff7cf6d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -14,7 +14,6 @@ #include <linux/err.h> #include <linux/of.h> #include <linux/iova_bitmap.h> -#include <uapi/linux/iommu.h> #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) @@ -41,8 +40,109 @@ struct iommu_domain_ops; struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; -struct iommu_fault_event; struct iommu_dma_cookie; +struct iommu_fault_param; + +#define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ +#define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ +#define IOMMU_FAULT_PERM_EXEC (1 << 2) /* exec */ +#define IOMMU_FAULT_PERM_PRIV (1 << 3) /* privileged */ + +/* Generic fault types, can be expanded IRQ remapping fault */ +enum iommu_fault_type { + IOMMU_FAULT_PAGE_REQ = 1, /* page request fault */ +}; + +/** + * struct iommu_fault_page_request - Page Request data + * @flags: encodes whether the corresponding fields are valid and whether this + * is the last page in group (IOMMU_FAULT_PAGE_REQUEST_* values). + * When IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID is set, the page response + * must have the same PASID value as the page request. When it is clear, + * the page response should not have a PASID. + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: requested page permissions (IOMMU_FAULT_PERM_* values) + * @addr: page address + * @private_data: device-specific private information + */ +struct iommu_fault_page_request { +#define IOMMU_FAULT_PAGE_REQUEST_PASID_VALID (1 << 0) +#define IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE (1 << 1) +#define IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID (1 << 2) + u32 flags; + u32 pasid; + u32 grpid; + u32 perm; + u64 addr; + u64 private_data[2]; +}; + +/** + * struct iommu_fault - Generic fault data + * @type: fault type from &enum iommu_fault_type + * @prm: Page Request message, when @type is %IOMMU_FAULT_PAGE_REQ + */ +struct iommu_fault { + u32 type; + struct iommu_fault_page_request prm; +}; + +/** + * enum iommu_page_response_code - Return status of fault handlers + * @IOMMU_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is "Success" in PCI PRI. + * @IOMMU_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is "Response Failure" in PCI PRI. + * @IOMMU_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is "Invalid Request" in PCI PRI. + */ +enum iommu_page_response_code { + IOMMU_PAGE_RESP_SUCCESS = 0, + IOMMU_PAGE_RESP_INVALID, + IOMMU_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_page_response - Generic page response information + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @code: response code from &enum iommu_page_response_code + */ +struct iommu_page_response { + u32 pasid; + u32 grpid; + u32 code; +}; + +struct iopf_fault { + struct iommu_fault fault; + /* node for pending lists */ + struct list_head list; +}; + +struct iopf_group { + struct iopf_fault last_fault; + struct list_head faults; + /* list node for iommu_fault_param::faults */ + struct list_head pending_node; + struct work_struct work; + struct iommu_domain *domain; + /* The device's fault data parameter. */ + struct iommu_fault_param *fault_param; +}; + +/** + * struct iopf_queue - IO Page Fault queue + * @wq: the fault workqueue + * @devices: devices attached to this queue + * @lock: protects the device list + */ +struct iopf_queue { + struct workqueue_struct *wq; + struct list_head devices; + struct mutex lock; +}; /* iommu fault flags */ #define IOMMU_FAULT_READ 0x0 @@ -50,7 +150,6 @@ struct iommu_dma_cookie; typedef int (*iommu_fault_handler_t)(struct iommu_domain *, struct device *, unsigned long, int, void *); -typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault *, void *); struct iommu_domain_geometry { dma_addr_t aperture_start; /* First address that can be mapped */ @@ -110,8 +209,7 @@ struct iommu_domain { unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; - enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, - void *data); + int (*iopf_handler)(struct iopf_group *group); void *fault_data; union { struct { @@ -419,6 +517,7 @@ static inline int __iommu_copy_struct_from_user_array( * Upon failure, ERR_PTR must be returned. * @domain_alloc_paging: Allocate an iommu_domain that can be used for * UNMANAGED, DMA, and DMA_FQ domain types. + * @domain_alloc_sva: Allocate an iommu_domain for Shared Virtual Addressing. * @probe_device: Add device to iommu driver handling * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU @@ -459,6 +558,8 @@ struct iommu_ops { struct device *dev, u32 flags, struct iommu_domain *parent, const struct iommu_user_data *user_data); struct iommu_domain *(*domain_alloc_paging)(struct device *dev); + struct iommu_domain *(*domain_alloc_sva)(struct device *dev, + struct mm_struct *mm); struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); @@ -468,25 +569,26 @@ struct iommu_ops { /* Request/Free a list of reserved regions for a device */ void (*get_resv_regions)(struct device *dev, struct list_head *list); - int (*of_xlate)(struct device *dev, struct of_phandle_args *args); + int (*of_xlate)(struct device *dev, const struct of_phandle_args *args); bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - int (*page_response)(struct device *dev, - struct iommu_fault_event *evt, - struct iommu_page_response *msg); + void (*page_response)(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); - void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); + void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; struct module *owner; struct iommu_domain *identity_domain; struct iommu_domain *blocked_domain; + struct iommu_domain *release_domain; struct iommu_domain *default_domain; }; @@ -578,38 +680,34 @@ struct iommu_device { }; /** - * struct iommu_fault_event - Generic fault event - * - * Can represent recoverable faults such as a page requests or - * unrecoverable faults such as DMA or IRQ remapping faults. - * - * @fault: fault descriptor - * @list: pending fault event list, used for tracking responses - */ -struct iommu_fault_event { - struct iommu_fault fault; - struct list_head list; -}; - -/** * struct iommu_fault_param - per-device IOMMU fault data - * @handler: Callback function to handle IOMMU faults at device level - * @data: handler private data - * @faults: holds the pending faults which needs response * @lock: protect pending faults list + * @users: user counter to manage the lifetime of the data + * @rcu: rcu head for kfree_rcu() + * @dev: the device that owns this param + * @queue: IOPF queue + * @queue_list: index into queue->devices + * @partial: faults that are part of a Page Request Group for which the last + * request hasn't been submitted yet. + * @faults: holds the pending faults which need response */ struct iommu_fault_param { - iommu_dev_fault_handler_t handler; - void *data; - struct list_head faults; struct mutex lock; + refcount_t users; + struct rcu_head rcu; + + struct device *dev; + struct iopf_queue *queue; + struct list_head queue_list; + + struct list_head partial; + struct list_head faults; }; /** * struct dev_iommu - Collection of per-device IOMMU data * * @fault_param: IOMMU detected device fault reporting data - * @iopf_param: I/O Page Fault queue and data * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data @@ -624,8 +722,7 @@ struct iommu_fault_param { */ struct dev_iommu { struct mutex lock; - struct iommu_fault_param *fault_param; - struct iopf_device_param *iopf_param; + struct iommu_fault_param __rcu *fault_param; struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; @@ -654,6 +751,22 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return (struct iommu_device *)dev_get_drvdata(dev); } +/** + * iommu_get_iommu_dev - Get iommu_device for a device + * @dev: an end-point device + * + * Note that this function must be called from the iommu_ops + * to retrieve the iommu_device for a device, which the core code + * guarentees it will not invoke the op without an attached iommu. + */ +static inline struct iommu_device *__iommu_get_iommu_dev(struct device *dev) +{ + return dev->iommu->iommu_dev; +} + +#define iommu_get_iommu_dev(dev, type, member) \ + container_of(__iommu_get_iommu_dev(dev), type, member) + static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { @@ -719,16 +832,6 @@ extern int iommu_group_for_each_dev(struct iommu_group *group, void *data, extern struct iommu_group *iommu_group_get(struct device *dev); extern struct iommu_group *iommu_group_ref_get(struct iommu_group *group); extern void iommu_group_put(struct iommu_group *group); -extern int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data); - -extern int iommu_unregister_device_fault_handler(struct device *dev); - -extern int iommu_report_device_fault(struct device *dev, - struct iommu_fault_event *evt); -extern int iommu_page_response(struct device *dev, - struct iommu_page_response *msg); extern int iommu_group_id(struct iommu_group *group); extern struct iommu_domain *iommu_group_default_domain(struct iommu_group *); @@ -905,8 +1008,8 @@ struct iommu_mm_data { int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, const struct iommu_ops *ops); void iommu_fwspec_free(struct device *dev); -int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -948,8 +1051,6 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group); int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); void iommu_detach_device_pasid(struct iommu_domain *domain, @@ -1138,31 +1239,6 @@ static inline void iommu_group_put(struct iommu_group *group) { } -static inline -int iommu_register_device_fault_handler(struct device *dev, - iommu_dev_fault_handler_t handler, - void *data) -{ - return -ENODEV; -} - -static inline int iommu_unregister_device_fault_handler(struct device *dev) -{ - return 0; -} - -static inline -int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) -{ - return -ENODEV; -} - -static inline int iommu_page_response(struct device *dev, - struct iommu_page_response *msg) -{ - return -ENODEV; -} - static inline int iommu_group_id(struct iommu_group *group) { return -ENODEV; @@ -1256,7 +1332,7 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, } static inline -const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode) +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) { return NULL; } @@ -1311,12 +1387,6 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) return -ENODEV; } -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} - static inline int iommu_attach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { @@ -1343,6 +1413,14 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ +#if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) +void iommu_group_mutex_assert(struct device *dev); +#else +static inline void iommu_group_mutex_assert(struct device *dev) +{ +} +#endif + /** * iommu_map_sgtable - Map the given buffer to the IOMMU domain * @domain: The IOMMU domain to perform the mapping @@ -1370,9 +1448,6 @@ static inline void iommu_debugfs_setup(void) {} #ifdef CONFIG_IOMMU_DMA #include <linux/msi.h> -/* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); - int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); @@ -1383,10 +1458,6 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); struct msi_desc; struct msi_msg; -static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) -{ -} - static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { return -ENODEV; @@ -1456,6 +1527,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1480,6 +1553,68 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} #endif /* CONFIG_IOMMU_SVA */ +#ifdef CONFIG_IOMMU_IOPF +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev); +void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev); +int iopf_queue_flush_dev(struct device *dev); +struct iopf_queue *iopf_queue_alloc(const char *name); +void iopf_queue_free(struct iopf_queue *queue); +int iopf_queue_discard_partial(struct iopf_queue *queue); +void iopf_free_group(struct iopf_group *group); +void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt); +void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status); +#else +static inline int +iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) +{ + return -ENODEV; +} + +static inline void +iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) +{ +} + +static inline int iopf_queue_flush_dev(struct device *dev) +{ + return -ENODEV; +} + +static inline struct iopf_queue *iopf_queue_alloc(const char *name) +{ + return NULL; +} + +static inline void iopf_queue_free(struct iopf_queue *queue) +{ +} + +static inline int iopf_queue_discard_partial(struct iopf_queue *queue) +{ + return -ENODEV; +} + +static inline void iopf_free_group(struct iopf_group *group) +{ +} + +static inline void +iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) +{ +} + +static inline void iopf_group_response(struct iopf_group *group, + enum iommu_page_response_code status) +{ +} +#endif /* CONFIG_IOMMU_IOPF */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index e3649a6563dd..4696abfd311c 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -34,7 +34,7 @@ * the same driver for allocation, read and write operations. * * Open-coding access to :c:type:`struct iosys_map <iosys_map>` is considered - * bad style. Rather then accessing its fields directly, use one of the provided + * bad style. Rather than accessing its fields directly, use one of the provided * helper functions, or implement your own. For example, instances of * :c:type:`struct iosys_map <iosys_map>` can be initialized statically with * IOSYS_MAP_INIT_VADDR(), or at runtime with iosys_map_set_vaddr(). These diff --git a/include/linux/iova.h b/include/linux/iova.h index 83c00fac2acb..d2c4fd923efa 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -65,6 +65,11 @@ static inline size_t iova_align(struct iova_domain *iovad, size_t size) return ALIGN(size, iovad->granule); } +static inline size_t iova_align_down(struct iova_domain *iovad, size_t size) +{ + return ALIGN_DOWN(size, iovad->granule); +} + static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova) { return (dma_addr_t)iova->pfn_lo << iova_shift(iovad); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 5e605e384aac..383a0ea2ab91 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -3,6 +3,7 @@ #define _IPV6_H #include <uapi/linux/ipv6.h> +#include <linux/cache.h> #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) #define ipv6_authlen(p) (((p)->hdrlen+2) << 2) @@ -10,9 +11,16 @@ * This structure contains configuration options per IPv6 link. */ struct ipv6_devconf { - __s32 forwarding; + /* RX & TX fastpath fields. */ + __cacheline_group_begin(ipv6_devconf_read_txrx); + __s32 disable_ipv6; __s32 hop_limit; __s32 mtu6; + __s32 forwarding; + __s32 disable_policy; + __s32 proxy_ndp; + __cacheline_group_end(ipv6_devconf_read_txrx); + __s32 accept_ra; __s32 accept_redirects; __s32 autoconf; @@ -27,6 +35,7 @@ struct ipv6_devconf { __s32 use_tempaddr; __s32 temp_valid_lft; __s32 temp_prefered_lft; + __s32 regen_min_advance; __s32 regen_max_retry; __s32 max_desync_factor; __s32 max_addresses; @@ -44,7 +53,6 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif - __s32 proxy_ndp; __s32 accept_source_route; __s32 accept_ra_from_local; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -54,7 +62,6 @@ struct ipv6_devconf { #ifdef CONFIG_IPV6_MROUTE atomic_t mc_forwarding; #endif - __s32 disable_ipv6; __s32 drop_unicast_in_l2_multicast; __s32 accept_dad; __s32 force_tllao; @@ -75,7 +82,6 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; - __s32 disable_policy; __s32 ndisc_tclass; __s32 rpl_seg_enabled; __u32 ioam6_id; diff --git a/include/linux/irq.h b/include/linux/irq.h index 90081afa10ce..a217e1029c1d 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -115,7 +115,7 @@ enum { * Return value for chip->irq_set_affinity() * * IRQ_SET_MASK_OK - OK, core updates irq_common_data.affinity - * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity + * IRQ_SET_MASK_NOCOPY - OK, chip did update irq_common_data.affinity * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to * support stacked irqchips, which indicates skipping * all descendant irqchips. @@ -179,7 +179,7 @@ struct irq_common_data { struct irq_data { u32 mask; unsigned int irq; - unsigned long hwirq; + irq_hw_number_t hwirq; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; diff --git a/include/linux/irqchip/riscv-aplic.h b/include/linux/irqchip/riscv-aplic.h new file mode 100644 index 000000000000..ec8f7df50583 --- /dev/null +++ b/include/linux/irqchip/riscv-aplic.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * Copyright (C) 2022 Ventana Micro Systems Inc. + */ +#ifndef __LINUX_IRQCHIP_RISCV_APLIC_H +#define __LINUX_IRQCHIP_RISCV_APLIC_H + +#include <linux/bitops.h> + +#define APLIC_MAX_IDC BIT(14) +#define APLIC_MAX_SOURCE 1024 + +#define APLIC_DOMAINCFG 0x0000 +#define APLIC_DOMAINCFG_RDONLY 0x80000000 +#define APLIC_DOMAINCFG_IE BIT(8) +#define APLIC_DOMAINCFG_DM BIT(2) +#define APLIC_DOMAINCFG_BE BIT(0) + +#define APLIC_SOURCECFG_BASE 0x0004 +#define APLIC_SOURCECFG_D BIT(10) +#define APLIC_SOURCECFG_CHILDIDX_MASK 0x000003ff +#define APLIC_SOURCECFG_SM_MASK 0x00000007 +#define APLIC_SOURCECFG_SM_INACTIVE 0x0 +#define APLIC_SOURCECFG_SM_DETACH 0x1 +#define APLIC_SOURCECFG_SM_EDGE_RISE 0x4 +#define APLIC_SOURCECFG_SM_EDGE_FALL 0x5 +#define APLIC_SOURCECFG_SM_LEVEL_HIGH 0x6 +#define APLIC_SOURCECFG_SM_LEVEL_LOW 0x7 + +#define APLIC_MMSICFGADDR 0x1bc0 +#define APLIC_MMSICFGADDRH 0x1bc4 +#define APLIC_SMSICFGADDR 0x1bc8 +#define APLIC_SMSICFGADDRH 0x1bcc + +#ifdef CONFIG_RISCV_M_MODE +#define APLIC_xMSICFGADDR APLIC_MMSICFGADDR +#define APLIC_xMSICFGADDRH APLIC_MMSICFGADDRH +#else +#define APLIC_xMSICFGADDR APLIC_SMSICFGADDR +#define APLIC_xMSICFGADDRH APLIC_SMSICFGADDRH +#endif + +#define APLIC_xMSICFGADDRH_L BIT(31) +#define APLIC_xMSICFGADDRH_HHXS_MASK 0x1f +#define APLIC_xMSICFGADDRH_HHXS_SHIFT 24 +#define APLIC_xMSICFGADDRH_HHXS (APLIC_xMSICFGADDRH_HHXS_MASK << \ + APLIC_xMSICFGADDRH_HHXS_SHIFT) +#define APLIC_xMSICFGADDRH_LHXS_MASK 0x7 +#define APLIC_xMSICFGADDRH_LHXS_SHIFT 20 +#define APLIC_xMSICFGADDRH_LHXS (APLIC_xMSICFGADDRH_LHXS_MASK << \ + APLIC_xMSICFGADDRH_LHXS_SHIFT) +#define APLIC_xMSICFGADDRH_HHXW_MASK 0x7 +#define APLIC_xMSICFGADDRH_HHXW_SHIFT 16 +#define APLIC_xMSICFGADDRH_HHXW (APLIC_xMSICFGADDRH_HHXW_MASK << \ + APLIC_xMSICFGADDRH_HHXW_SHIFT) +#define APLIC_xMSICFGADDRH_LHXW_MASK 0xf +#define APLIC_xMSICFGADDRH_LHXW_SHIFT 12 +#define APLIC_xMSICFGADDRH_LHXW (APLIC_xMSICFGADDRH_LHXW_MASK << \ + APLIC_xMSICFGADDRH_LHXW_SHIFT) +#define APLIC_xMSICFGADDRH_BAPPN_MASK 0xfff +#define APLIC_xMSICFGADDRH_BAPPN_SHIFT 0 +#define APLIC_xMSICFGADDRH_BAPPN (APLIC_xMSICFGADDRH_BAPPN_MASK << \ + APLIC_xMSICFGADDRH_BAPPN_SHIFT) + +#define APLIC_xMSICFGADDR_PPN_SHIFT 12 + +#define APLIC_xMSICFGADDR_PPN_HART(__lhxs) \ + (BIT(__lhxs) - 1) + +#define APLIC_xMSICFGADDR_PPN_LHX_MASK(__lhxw) \ + (BIT(__lhxw) - 1) +#define APLIC_xMSICFGADDR_PPN_LHX_SHIFT(__lhxs) \ + ((__lhxs)) +#define APLIC_xMSICFGADDR_PPN_LHX(__lhxw, __lhxs) \ + (APLIC_xMSICFGADDR_PPN_LHX_MASK(__lhxw) << \ + APLIC_xMSICFGADDR_PPN_LHX_SHIFT(__lhxs)) + +#define APLIC_xMSICFGADDR_PPN_HHX_MASK(__hhxw) \ + (BIT(__hhxw) - 1) +#define APLIC_xMSICFGADDR_PPN_HHX_SHIFT(__hhxs) \ + ((__hhxs) + APLIC_xMSICFGADDR_PPN_SHIFT) +#define APLIC_xMSICFGADDR_PPN_HHX(__hhxw, __hhxs) \ + (APLIC_xMSICFGADDR_PPN_HHX_MASK(__hhxw) << \ + APLIC_xMSICFGADDR_PPN_HHX_SHIFT(__hhxs)) + +#define APLIC_IRQBITS_PER_REG 32 + +#define APLIC_SETIP_BASE 0x1c00 +#define APLIC_SETIPNUM 0x1cdc + +#define APLIC_CLRIP_BASE 0x1d00 +#define APLIC_CLRIPNUM 0x1ddc + +#define APLIC_SETIE_BASE 0x1e00 +#define APLIC_SETIENUM 0x1edc + +#define APLIC_CLRIE_BASE 0x1f00 +#define APLIC_CLRIENUM 0x1fdc + +#define APLIC_SETIPNUM_LE 0x2000 +#define APLIC_SETIPNUM_BE 0x2004 + +#define APLIC_GENMSI 0x3000 + +#define APLIC_TARGET_BASE 0x3004 +#define APLIC_TARGET_HART_IDX_SHIFT 18 +#define APLIC_TARGET_HART_IDX_MASK 0x3fff +#define APLIC_TARGET_HART_IDX (APLIC_TARGET_HART_IDX_MASK << \ + APLIC_TARGET_HART_IDX_SHIFT) +#define APLIC_TARGET_GUEST_IDX_SHIFT 12 +#define APLIC_TARGET_GUEST_IDX_MASK 0x3f +#define APLIC_TARGET_GUEST_IDX (APLIC_TARGET_GUEST_IDX_MASK << \ + APLIC_TARGET_GUEST_IDX_SHIFT) +#define APLIC_TARGET_IPRIO_SHIFT 0 +#define APLIC_TARGET_IPRIO_MASK 0xff +#define APLIC_TARGET_IPRIO (APLIC_TARGET_IPRIO_MASK << \ + APLIC_TARGET_IPRIO_SHIFT) +#define APLIC_TARGET_EIID_SHIFT 0 +#define APLIC_TARGET_EIID_MASK 0x7ff +#define APLIC_TARGET_EIID (APLIC_TARGET_EIID_MASK << \ + APLIC_TARGET_EIID_SHIFT) + +#define APLIC_IDC_BASE 0x4000 +#define APLIC_IDC_SIZE 32 + +#define APLIC_IDC_IDELIVERY 0x00 + +#define APLIC_IDC_IFORCE 0x04 + +#define APLIC_IDC_ITHRESHOLD 0x08 + +#define APLIC_IDC_TOPI 0x18 +#define APLIC_IDC_TOPI_ID_SHIFT 16 +#define APLIC_IDC_TOPI_ID_MASK 0x3ff +#define APLIC_IDC_TOPI_ID (APLIC_IDC_TOPI_ID_MASK << \ + APLIC_IDC_TOPI_ID_SHIFT) +#define APLIC_IDC_TOPI_PRIO_SHIFT 0 +#define APLIC_IDC_TOPI_PRIO_MASK 0xff +#define APLIC_IDC_TOPI_PRIO (APLIC_IDC_TOPI_PRIO_MASK << \ + APLIC_IDC_TOPI_PRIO_SHIFT) + +#define APLIC_IDC_CLAIMI 0x1c + +#endif diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h new file mode 100644 index 000000000000..faf0b800b1b0 --- /dev/null +++ b/include/linux/irqchip/riscv-imsic.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * Copyright (C) 2022 Ventana Micro Systems Inc. + */ +#ifndef __LINUX_IRQCHIP_RISCV_IMSIC_H +#define __LINUX_IRQCHIP_RISCV_IMSIC_H + +#include <linux/types.h> +#include <linux/bitops.h> +#include <asm/csr.h> + +#define IMSIC_MMIO_PAGE_SHIFT 12 +#define IMSIC_MMIO_PAGE_SZ BIT(IMSIC_MMIO_PAGE_SHIFT) +#define IMSIC_MMIO_PAGE_LE 0x00 +#define IMSIC_MMIO_PAGE_BE 0x04 + +#define IMSIC_MIN_ID 63 +#define IMSIC_MAX_ID 2048 + +#define IMSIC_EIDELIVERY 0x70 + +#define IMSIC_EITHRESHOLD 0x72 + +#define IMSIC_EIP0 0x80 +#define IMSIC_EIP63 0xbf +#define IMSIC_EIPx_BITS 32 + +#define IMSIC_EIE0 0xc0 +#define IMSIC_EIE63 0xff +#define IMSIC_EIEx_BITS 32 + +#define IMSIC_FIRST IMSIC_EIDELIVERY +#define IMSIC_LAST IMSIC_EIE63 + +#define IMSIC_MMIO_SETIPNUM_LE 0x00 +#define IMSIC_MMIO_SETIPNUM_BE 0x04 + +struct imsic_local_config { + phys_addr_t msi_pa; + void __iomem *msi_va; +}; + +struct imsic_global_config { + /* + * MSI Target Address Scheme + * + * XLEN-1 12 0 + * | | | + * ------------------------------------------------------------- + * |xxxxxx|Group Index|xxxxxxxxxxx|HART Index|Guest Index| 0 | + * ------------------------------------------------------------- + */ + + /* Bits representing Guest index, HART index, and Group index */ + u32 guest_index_bits; + u32 hart_index_bits; + u32 group_index_bits; + u32 group_index_shift; + + /* Global base address matching all target MSI addresses */ + phys_addr_t base_addr; + + /* Number of interrupt identities */ + u32 nr_ids; + + /* Number of guest interrupt identities */ + u32 nr_guest_ids; + + /* Per-CPU IMSIC addresses */ + struct imsic_local_config __percpu *local; +}; + +#ifdef CONFIG_RISCV_IMSIC + +const struct imsic_global_config *imsic_get_global_config(void); + +#else + +static inline const struct imsic_global_config *imsic_get_global_config(void) +{ + return NULL; +} + +#endif + +#endif diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index d9451d456a73..fd091c35d572 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -18,6 +18,18 @@ struct irq_domain; struct pt_regs; /** + * struct irqstat - interrupt statistics + * @cnt: real-time interrupt count + * @ref: snapshot of interrupt count + */ +struct irqstat { + unsigned int cnt; +#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT + unsigned int ref; +#endif +}; + +/** * struct irq_desc - interrupt descriptor * @irq_common_data: per irq and chip data passed down to chip functions * @kstat_irqs: irq stats per cpu @@ -55,7 +67,7 @@ struct pt_regs; struct irq_desc { struct irq_common_data irq_common_data; struct irq_data irq_data; - unsigned int __percpu *kstat_irqs; + struct irqstat __percpu *kstat_irqs; irq_flow_handler_t handle_irq; struct irqaction *action; /* IRQ action list */ unsigned int status_use_accessors; @@ -119,7 +131,7 @@ extern struct irq_desc irq_desc[NR_IRQS]; static inline unsigned int irq_desc_kstat_cpu(struct irq_desc *desc, unsigned int cpu) { - return desc->kstat_irqs ? *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; + return desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, cpu) : 0; } static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ee0a82c60508..21ecf582a0fe 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -619,6 +619,23 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ +#ifdef CONFIG_GENERIC_MSI_IRQ +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type); +void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); +#else +static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, + unsigned int type) +{ + WARN_ON_ONCE(1); + return -EINVAL; +} +static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) +{ + WARN_ON_ONCE(1); +} +#endif + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } static inline struct irq_domain *irq_find_matching_fwnode( diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index c29921fd8cd1..36653e2ee1c9 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -25,7 +25,8 @@ enum irq_domain_bus_token { DOMAIN_BUS_PCI_DEVICE_MSIX, DOMAIN_BUS_DMAR, DOMAIN_BUS_AMDVI, - DOMAIN_BUS_PCI_DEVICE_IMS, + DOMAIN_BUS_DEVICE_MSI, + DOMAIN_BUS_WIRED_TO_MSI, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 147feebd508c..3f003d5fde53 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -114,7 +114,7 @@ do { \ # define lockdep_softirq_enter() do { } while (0) # define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false -# define lockdep_hrtimer_exit(__context) do { } while (0) +# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0) # define lockdep_posixtimer_enter() do { } while (0) # define lockdep_posixtimer_exit() do { } while (0) # define lockdep_irq_work_enter(__work) do { } while (0) diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index c30f454a9518..72dd1eb3a0e7 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ */ struct irq_desc; -struct irq_data; + typedef void (*irq_flow_handler_t)(struct irq_desc *desc); #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 971f3e826e15..ab04c1c27fae 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1434,7 +1434,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ -void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); +enum jbd2_shrink_type {JBD2_SHRINK_DESTROY, JBD2_SHRINK_BUSY_STOP, JBD2_SHRINK_BUSY_SKIP}; + +void __jbd2_journal_clean_checkpoint_list(journal_t *journal, enum jbd2_shrink_type type); unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan); int __jbd2_journal_remove_checkpoint(struct journal_head *); int jbd2_journal_try_remove_checkpoint(struct journal_head *jh); @@ -1586,10 +1588,8 @@ void jbd2_journal_put_journal_head(struct journal_head *jh); */ extern struct kmem_cache *jbd2_handle_cache; -static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags) -{ - return kmem_cache_zalloc(jbd2_handle_cache, gfp_flags); -} +#define jbd2_alloc_handle(_gfp_flags) \ + ((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags)) static inline void jbd2_free_handle(handle_t *handle) { @@ -1602,10 +1602,8 @@ static inline void jbd2_free_handle(handle_t *handle) */ extern struct kmem_cache *jbd2_inode_cache; -static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags) -{ - return kmem_cache_alloc(jbd2_inode_cache, gfp_flags); -} +#define jbd2_alloc_inode(_gfp_flags) \ + ((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags)) static inline void jbd2_free_inode(struct jbd2_inode *jinode) { @@ -1696,7 +1694,7 @@ static inline void jbd2_journal_abort_handle(handle_t *handle) static inline void jbd2_init_fs_dev_write_error(journal_t *journal) { - struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + struct address_space *mapping = journal->j_fs_dev->bd_mapping; /* * Save the original wb_err value of client fs's bdev mapping which @@ -1707,7 +1705,7 @@ static inline void jbd2_init_fs_dev_write_error(journal_t *journal) static inline int jbd2_check_fs_dev_write_error(journal_t *journal) { - struct address_space *mapping = journal->j_fs_dev->bd_inode->i_mapping; + struct address_space *mapping = journal->j_fs_dev->bd_mapping; return errseq_check(&mapping->wb_err, READ_ONCE(journal->j_fs_dev_wb_err)); diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index e0ae2a43e0eb..d9f1435a5a13 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -102,12 +102,15 @@ static inline u64 get_jiffies_64(void) } #endif -/* - * These inlines deal with timer wrapping correctly. You are - * strongly encouraged to use them: - * 1. Because people otherwise forget - * 2. Because if the timer wrap changes in future you won't have to - * alter your driver code. +/** + * DOC: General information about time_* inlines + * + * These inlines deal with timer wrapping correctly. You are strongly encouraged + * to use them: + * + * #. Because people otherwise forget + * #. Because if the timer wrap changes in future you won't have to alter your + * driver code. */ /** diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f0a949b7c973..f5a2727ca4a9 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -216,6 +216,7 @@ extern struct jump_entry __start___jump_table[]; extern struct jump_entry __stop___jump_table[]; extern void jump_label_init(void); +extern void jump_label_init_ro(void); extern void jump_label_lock(void); extern void jump_label_unlock(void); extern void arch_jump_label_transform(struct jump_entry *entry, @@ -265,6 +266,8 @@ static __always_inline void jump_label_init(void) static_key_initialized = true; } +static __always_inline void jump_label_init_ro(void) { } + static __always_inline bool static_key_false(struct static_key *key) { if (unlikely_notrace(static_key_count(key) > 0)) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dbb06d789e74..70d6a8f6e25d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -429,7 +429,6 @@ struct kasan_cache { }; size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); -slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); @@ -446,11 +445,6 @@ static inline size_t kasan_metadata_size(struct kmem_cache *cache, { return 0; } -/* And thus nothing prevents cache merging. */ -static inline slab_flags_t kasan_never_merge(void) -{ - return 0; -} /* And no cache-related metadata initialization is required. */ static inline void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d9ad21058eed..be2e8c0a187e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,20 +33,14 @@ #include <linux/sprintf.h> #include <linux/static_call_types.h> #include <linux/instruction_pointer.h> +#include <linux/wordpart.h> + #include <asm/byteorder.h> #include <uapi/linux/kernel.h> #define STACK_MAGIC 0xdeadbeef -/** - * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value - * @x: value to repeat - * - * NOTE: @x is not checked for > 0xff; larger values produce odd results. - */ -#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) - /* generic data direction definitions */ #define READ 0 #define WRITE 1 @@ -60,34 +54,6 @@ } \ ) -/** - * upper_32_bits - return bits 32-63 of a number - * @n: the number we're accessing - * - * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress - * the "right shift count >= width of type" warning when that quantity is - * 32-bits. - */ -#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) - -/** - * lower_32_bits - return bits 0-31 of a number - * @n: the number we're accessing - */ -#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) - -/** - * upper_16_bits - return bits 16-31 of a number - * @n: the number we're accessing - */ -#define upper_16_bits(n) ((u16)((n) >> 16)) - -/** - * lower_16_bits - return bits 0-15 of a number - * @n: the number we're accessing - */ -#define lower_16_bits(n) ((u16)((n) & 0xffff)) - struct completion; struct user; @@ -199,12 +165,6 @@ static inline void might_fault(void) { } void do_exit(long error_code) __noreturn; -extern int get_option(char **str, int *pint); -extern char *get_options(const char *str, int nints, int *ints); -extern unsigned long long memparse(const char *ptr, char **retptr); -extern bool parse_option_str(const char *str, const char *option); -extern char *next_arg(char *args, char **param, char **val); - extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); @@ -255,6 +215,7 @@ enum ftrace_dump_mode { DUMP_NONE, DUMP_ALL, DUMP_ORIG, + DUMP_PARAM, }; #ifdef CONFIG_TRACING diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 9935f7ecbfb9..9c042c6384bb 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -79,6 +79,14 @@ static inline unsigned int kstat_cpu_softirqs_sum(int cpu) return sum; } +#ifdef CONFIG_GENERIC_IRQ_STAT_SNAPSHOT +extern void kstat_snapshot_irqs(void); +extern unsigned int kstat_get_irq_since_snapshot(unsigned int irq); +#else +static inline void kstat_snapshot_irqs(void) { } +static inline unsigned int kstat_get_irq_since_snapshot(unsigned int irq) { return 0; } +#endif + /* * Number of interrupts per specific IRQ source, since bootup */ diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 99aaa050ccb7..87c79d076d6d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -206,23 +206,25 @@ struct kernfs_node { const void *ns; /* namespace tag */ unsigned int hash; /* ns + name hash */ + unsigned short flags; + umode_t mode; + union { struct kernfs_elem_dir dir; struct kernfs_elem_symlink symlink; struct kernfs_elem_attr attr; }; - void *priv; - /* * 64bit unique ID. On 64bit ino setups, id is the ino. On 32bit, * the low 32bits are ino and upper generation. */ u64 id; - unsigned short flags; - umode_t mode; + void *priv; struct kernfs_iattrs *iattr; + + struct rcu_head rcu; }; /* diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 400cb6c02176..f0e9f8eda7a3 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -15,7 +15,8 @@ #if !defined(__ASSEMBLY__) -#include <linux/crash_core.h> +#include <linux/vmcore_info.h> +#include <linux/crash_reserve.h> #include <asm/io.h> #include <linux/range.h> @@ -31,6 +32,7 @@ extern note_buf_t __percpu *crash_notes; #include <linux/module.h> #include <linux/highmem.h> #include <asm/kexec.h> +#include <linux/crash_core.h> /* Verify architecture specific macros are defined */ @@ -317,8 +319,10 @@ struct kimage { /* If set, we are using file mode kexec syscall */ unsigned int file_mode:1; #ifdef CONFIG_CRASH_HOTPLUG - /* If set, allow changes to elfcorehdr of kexec_load'd image */ - unsigned int update_elfcorehdr:1; + /* If set, it is safe to update kexec segments that are + * excluded from SHA calculation. + */ + unsigned int hotplug_support:1; #endif #ifdef ARCH_HAS_KIMAGE_ARCH @@ -378,13 +382,6 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image, static inline int machine_kexec_post_load(struct kimage *image) { return 0; } #endif -extern void __crash_kexec(struct pt_regs *); -extern void crash_kexec(struct pt_regs *); -int kexec_should_crash(struct task_struct *); -int kexec_crash_loaded(void); -void crash_save_cpu(struct pt_regs *regs, int cpu); -extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); - extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; @@ -396,9 +393,10 @@ bool kexec_load_permitted(int kexec_image_type); /* List of defined/legal kexec flags */ #ifndef CONFIG_KEXEC_JUMP -#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_UPDATE_ELFCOREHDR) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_UPDATE_ELFCOREHDR | KEXEC_CRASH_HOTPLUG_SUPPORT) #else -#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_UPDATE_ELFCOREHDR) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_UPDATE_ELFCOREHDR | \ + KEXEC_CRASH_HOTPLUG_SUPPORT) #endif /* List of defined/legal kexec file flags */ @@ -408,24 +406,6 @@ bool kexec_load_permitted(int kexec_image_type); /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -int crash_shrink_memory(unsigned long new_size); -ssize_t crash_get_memory_size(void); - -#ifndef arch_kexec_protect_crashkres -/* - * Protection mechanism for crashkernel reserved memory after - * the kdump kernel is loaded. - * - * Provide an empty default implementation here -- architecture - * code may override this - */ -static inline void arch_kexec_protect_crashkres(void) { } -#endif - -#ifndef arch_kexec_unprotect_crashkres -static inline void arch_kexec_unprotect_crashkres(void) { } -#endif - #ifndef page_to_boot_pfn static inline unsigned long page_to_boot_pfn(struct page *page) { @@ -482,30 +462,10 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { } #endif -#ifndef arch_crash_handle_hotplug_event -static inline void arch_crash_handle_hotplug_event(struct kimage *image) { } -#endif - -int crash_check_update_elfcorehdr(void); - -#ifndef crash_hotplug_cpu_support -static inline int crash_hotplug_cpu_support(void) { return 0; } -#endif - -#ifndef crash_hotplug_memory_support -static inline int crash_hotplug_memory_support(void) { return 0; } -#endif - -#ifndef crash_get_elfcorehdr_size -static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; } -#endif - extern bool kexec_file_dbg_print; -#define kexec_dprintk(fmt, ...) \ - printk("%s" fmt, \ - kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \ - ##__VA_ARGS__) +#define kexec_dprintk(fmt, arg...) \ + do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0) #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 0b35a41440ff..564868bdce89 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -36,10 +36,16 @@ * to lock the reader. */ -#include <linux/kernel.h> +#include <linux/array_size.h> +#include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/stddef.h> -#include <linux/scatterlist.h> +#include <linux/types.h> + +#include <asm/barrier.h> +#include <asm/errno.h> + +struct scatterlist; struct __kfifo { unsigned int in; @@ -304,19 +310,25 @@ __kfifo_uint_must_check_helper( \ ) /** - * kfifo_skip - skip output data + * kfifo_skip_count - skip output data * @fifo: address of the fifo to be used + * @count: count of data to skip */ -#define kfifo_skip(fifo) \ -(void)({ \ +#define kfifo_skip_count(fifo, count) do { \ typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__recsize) \ __kfifo_skip_r(__kfifo, __recsize); \ else \ - __kfifo->out++; \ -}) + __kfifo->out += (count); \ +} while(0) + +/** + * kfifo_skip - skip output data + * @fifo: address of the fifo to be used + */ +#define kfifo_skip(fifo) kfifo_skip_count(fifo, 1) /** * kfifo_peek_len - gets the size of the next fifo record @@ -578,7 +590,7 @@ __kfifo_uint_must_check_helper( \ * @buf: pointer to the storage buffer * @n: max. number of elements to get * - * This macro get some data from the fifo and return the numbers of elements + * This macro gets some data from the fifo and returns the numbers of elements * copied. * * Note that with only one concurrent reader and one concurrent @@ -605,7 +617,7 @@ __kfifo_uint_must_check_helper( \ * @n: max. number of elements to get * @lock: pointer to the spinlock to use for locking * - * This macro get the data from the fifo and return the numbers of elements + * This macro gets the data from the fifo and returns the numbers of elements * copied. */ #define kfifo_out_spinlocked(fifo, buf, n, lock) \ @@ -703,11 +715,12 @@ __kfifo_int_must_check_helper( \ ) /** - * kfifo_dma_in_prepare - setup a scatterlist for DMA input + * kfifo_dma_in_prepare_mapped - setup a scatterlist for DMA input * @fifo: address of the fifo to be used * @sgl: pointer to the scatterlist array * @nents: number of entries in the scatterlist array * @len: number of elements to transfer + * @dma: mapped dma address to fill into @sgl * * This macro fills a scatterlist for DMA input. * It returns the number entries in the scatterlist array. @@ -715,7 +728,7 @@ __kfifo_int_must_check_helper( \ * Note that with only one concurrent reader and one concurrent * writer, you don't need extra locking to use these macros. */ -#define kfifo_dma_in_prepare(fifo, sgl, nents, len) \ +#define kfifo_dma_in_prepare_mapped(fifo, sgl, nents, len, dma) \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ @@ -724,16 +737,20 @@ __kfifo_int_must_check_helper( \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ (__recsize) ? \ - __kfifo_dma_in_prepare_r(__kfifo, __sgl, __nents, __len, __recsize) : \ - __kfifo_dma_in_prepare(__kfifo, __sgl, __nents, __len); \ + __kfifo_dma_in_prepare_r(__kfifo, __sgl, __nents, __len, __recsize, \ + dma) : \ + __kfifo_dma_in_prepare(__kfifo, __sgl, __nents, __len, dma); \ }) +#define kfifo_dma_in_prepare(fifo, sgl, nents, len) \ + kfifo_dma_in_prepare_mapped(fifo, sgl, nents, len, DMA_MAPPING_ERROR) + /** * kfifo_dma_in_finish - finish a DMA IN operation * @fifo: address of the fifo to be used * @len: number of bytes to received * - * This macro finish a DMA IN operation. The in counter will be updated by + * This macro finishes a DMA IN operation. The in counter will be updated by * the len parameter. No error checking will be done. * * Note that with only one concurrent reader and one concurrent @@ -752,11 +769,12 @@ __kfifo_int_must_check_helper( \ }) /** - * kfifo_dma_out_prepare - setup a scatterlist for DMA output + * kfifo_dma_out_prepare_mapped - setup a scatterlist for DMA output * @fifo: address of the fifo to be used * @sgl: pointer to the scatterlist array * @nents: number of entries in the scatterlist array * @len: number of elements to transfer + * @dma: mapped dma address to fill into @sgl * * This macro fills a scatterlist for DMA output which at most @len bytes * to transfer. @@ -766,7 +784,7 @@ __kfifo_int_must_check_helper( \ * Note that with only one concurrent reader and one concurrent * writer, you don't need extra locking to use these macros. */ -#define kfifo_dma_out_prepare(fifo, sgl, nents, len) \ +#define kfifo_dma_out_prepare_mapped(fifo, sgl, nents, len, dma) \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ @@ -775,32 +793,29 @@ __kfifo_int_must_check_helper( \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ (__recsize) ? \ - __kfifo_dma_out_prepare_r(__kfifo, __sgl, __nents, __len, __recsize) : \ - __kfifo_dma_out_prepare(__kfifo, __sgl, __nents, __len); \ + __kfifo_dma_out_prepare_r(__kfifo, __sgl, __nents, __len, __recsize, \ + dma) : \ + __kfifo_dma_out_prepare(__kfifo, __sgl, __nents, __len, dma); \ }) +#define kfifo_dma_out_prepare(fifo, sgl, nents, len) \ + kfifo_dma_out_prepare_mapped(fifo, sgl, nents, len, DMA_MAPPING_ERROR) + /** * kfifo_dma_out_finish - finish a DMA OUT operation * @fifo: address of the fifo to be used * @len: number of bytes transferred * - * This macro finish a DMA OUT operation. The out counter will be updated by + * This macro finishes a DMA OUT operation. The out counter will be updated by * the len parameter. No error checking will be done. * * Note that with only one concurrent reader and one concurrent * writer, you don't need extra locking to use these macros. */ -#define kfifo_dma_out_finish(fifo, len) \ -(void)({ \ - typeof((fifo) + 1) __tmp = (fifo); \ - unsigned int __len = (len); \ - const size_t __recsize = sizeof(*__tmp->rectype); \ - struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (__recsize) \ - __kfifo_dma_out_finish_r(__kfifo, __recsize); \ - else \ - __kfifo->out += __len / sizeof(*__tmp->type); \ -}) +#define kfifo_dma_out_finish(fifo, len) do { \ + typeof((fifo) + 1) ___tmp = (fifo); \ + kfifo_skip_count(___tmp, (len) / sizeof(*___tmp->type)); \ +} while (0) /** * kfifo_out_peek - gets some data from the fifo @@ -808,7 +823,7 @@ __kfifo_int_must_check_helper( \ * @buf: pointer to the storage buffer * @n: max. number of elements to get * - * This macro get the data from the fifo and return the numbers of elements + * This macro gets the data from the fifo and returns the numbers of elements * copied. The data is not removed from the fifo. * * Note that with only one concurrent reader and one concurrent @@ -828,6 +843,63 @@ __kfifo_uint_must_check_helper( \ }) \ ) +/** + * kfifo_out_linear - gets a tail of/offset to available data + * @fifo: address of the fifo to be used + * @tail: pointer to an unsigned int to store the value of tail + * @n: max. number of elements to point at + * + * This macro obtains the offset (tail) to the available data in the fifo + * buffer and returns the + * numbers of elements available. It returns the available count till the end + * of data or till the end of the buffer. So that it can be used for linear + * data processing (like memcpy() of (@fifo->data + @tail) with count + * returned). + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these macro. + */ +#define kfifo_out_linear(fifo, tail, n) \ +__kfifo_uint_must_check_helper( \ +({ \ + typeof((fifo) + 1) __tmp = (fifo); \ + unsigned int *__tail = (tail); \ + unsigned long __n = (n); \ + const size_t __recsize = sizeof(*__tmp->rectype); \ + struct __kfifo *__kfifo = &__tmp->kfifo; \ + (__recsize) ? \ + __kfifo_out_linear_r(__kfifo, __tail, __n, __recsize) : \ + __kfifo_out_linear(__kfifo, __tail, __n); \ +}) \ +) + +/** + * kfifo_out_linear_ptr - gets a pointer to the available data + * @fifo: address of the fifo to be used + * @ptr: pointer to data to store the pointer to tail + * @n: max. number of elements to point at + * + * Similarly to kfifo_out_linear(), this macro obtains the pointer to the + * available data in the fifo buffer and returns the numbers of elements + * available. It returns the available count till the end of available data or + * till the end of the buffer. So that it can be used for linear data + * processing (like memcpy() of @ptr with count returned). + * + * Note that with only one concurrent reader and one concurrent + * writer, you don't need extra locking to use these macro. + */ +#define kfifo_out_linear_ptr(fifo, ptr, n) \ +__kfifo_uint_must_check_helper( \ +({ \ + typeof((fifo) + 1) ___tmp = (fifo); \ + unsigned int ___tail; \ + unsigned int ___n = kfifo_out_linear(___tmp, &___tail, (n)); \ + *(ptr) = ___tmp->kfifo.data + ___tail * kfifo_esize(___tmp); \ + ___n; \ +}) \ +) + + extern int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, size_t esize, gfp_t gfp_mask); @@ -849,14 +921,17 @@ extern int __kfifo_to_user(struct __kfifo *fifo, void __user *to, unsigned long len, unsigned int *copied); extern unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo, - struct scatterlist *sgl, int nents, unsigned int len); + struct scatterlist *sgl, int nents, unsigned int len, dma_addr_t dma); extern unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo, - struct scatterlist *sgl, int nents, unsigned int len); + struct scatterlist *sgl, int nents, unsigned int len, dma_addr_t dma); extern unsigned int __kfifo_out_peek(struct __kfifo *fifo, void *buf, unsigned int len); +extern unsigned int __kfifo_out_linear(struct __kfifo *fifo, + unsigned int *tail, unsigned int n); + extern unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf, unsigned int len, size_t recsize); @@ -871,15 +946,15 @@ extern int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to, unsigned long len, unsigned int *copied, size_t recsize); extern unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, - struct scatterlist *sgl, int nents, unsigned int len, size_t recsize); + struct scatterlist *sgl, int nents, unsigned int len, size_t recsize, + dma_addr_t dma); extern void __kfifo_dma_in_finish_r(struct __kfifo *fifo, unsigned int len, size_t recsize); extern unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, - struct scatterlist *sgl, int nents, unsigned int len, size_t recsize); - -extern void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize); + struct scatterlist *sgl, int nents, unsigned int len, size_t recsize, + dma_addr_t dma); extern unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize); @@ -888,6 +963,9 @@ extern void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize); extern unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize); +extern unsigned int __kfifo_out_linear_r(struct __kfifo *fifo, + unsigned int *tail, unsigned int n, size_t recsize); + extern unsigned int __kfifo_max_r(unsigned int len, size_t recsize); #endif diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h index c4cae333deec..e1082dc40abc 100644 --- a/include/linux/kmsan-checks.h +++ b/include/linux/kmsan-checks.h @@ -61,6 +61,17 @@ void kmsan_check_memory(const void *address, size_t size); void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy, size_t left); +/** + * kmsan_memmove() - Notify KMSAN about a data copy within kernel. + * @to: destination address in the kernel. + * @from: source address in the kernel. + * @size: number of bytes to copy. + * + * Invoked after non-instrumented version (e.g. implemented using assembly + * code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata. + */ +void kmsan_memmove(void *to, const void *from, size_t to_copy); + #else static inline void kmsan_poison_memory(const void *address, size_t size, @@ -78,6 +89,10 @@ static inline void kmsan_copy_to_user(void __user *to, const void *from, { } +static inline void kmsan_memmove(void *to, const void *from, size_t to_copy) +{ +} + #endif #endif /* _LINUX_KMSAN_CHECKS_H */ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index c30affcc43b4..c8219505a79f 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -38,7 +38,7 @@ extern char uevent_helper[]; #endif /* counter to tag the uevent, read only except for the kobject core */ -extern u64 uevent_seqnum; +extern atomic64_t uevent_seqnum; /* * The actions here must match the index to the string array diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 0ff44d6633e3..5fcbc254d186 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -378,11 +378,15 @@ static inline void wait_for_kprobe_optimizer(void) { } extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); +/* Set when ftrace has been killed: kprobes on ftrace must be disabled for safety */ +extern bool kprobe_ftrace_disabled __read_mostly; +extern void kprobe_ftrace_kill(void); #else static inline int arch_prepare_kprobe_ftrace(struct kprobe *p) { return -EINVAL; } +static inline void kprobe_ftrace_kill(void) {} #endif /* CONFIG_KPROBES_ON_FTRACE */ /* Get the kprobe at this addr (if any) - called with preemption disabled */ @@ -495,6 +499,9 @@ static inline void kprobe_flush_task(struct task_struct *tk) static inline void kprobe_free_init_mem(void) { } +static inline void kprobe_ftrace_kill(void) +{ +} static inline int disable_kprobe(struct kprobe *kp) { return -EOPNOTSUPP; diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 401348e9f92b..52c63a9c5a9c 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -45,16 +45,16 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) { - int ret; + if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) + return __ksm_enter(mm); - if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) { - ret = __ksm_enter(mm); - if (ret) - return ret; - } + return 0; +} - if (test_bit(MMF_VM_MERGE_ANY, &oldmm->flags)) - set_bit(MMF_VM_MERGE_ANY, &mm->flags); +static inline int ksm_execve(struct mm_struct *mm) +{ + if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) + return __ksm_enter(mm); return 0; } @@ -81,15 +81,9 @@ struct folio *ksm_might_need_to_copy(struct folio *folio, void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc); void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); - -#ifdef CONFIG_MEMORY_FAILURE -void collect_procs_ksm(struct page *page, struct list_head *to_kill, - int force_early); -#endif - -#ifdef CONFIG_PROC_FS +void collect_procs_ksm(struct folio *folio, struct page *page, + struct list_head *to_kill, int force_early); long ksm_process_profit(struct mm_struct *); -#endif /* CONFIG_PROC_FS */ #else /* !CONFIG_KSM */ @@ -107,6 +101,11 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) return 0; } +static inline int ksm_execve(struct mm_struct *mm) +{ + return 0; +} + static inline void ksm_exit(struct mm_struct *mm) { } @@ -115,12 +114,10 @@ static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte) { } -#ifdef CONFIG_MEMORY_FAILURE -static inline void collect_procs_ksm(struct page *page, +static inline void collect_procs_ksm(struct folio *folio, struct page *page, struct list_head *to_kill, int force_early) { } -#endif #ifdef CONFIG_MMU static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 179df96b20f8..692c01e41a18 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -148,6 +148,11 @@ static inline bool kvm_is_error_hva(unsigned long addr) #endif +static inline bool kvm_is_error_gpa(gpa_t gpa) +{ + return gpa == INVALID_GPA; +} + #define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) static inline bool is_error_page(struct page *page) @@ -188,8 +193,6 @@ static inline bool is_error_page(struct page *page) bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); -bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req, - struct kvm_vcpu *except); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -238,7 +241,6 @@ struct kvm_async_pf { struct list_head link; struct list_head queue; struct kvm_vcpu *vcpu; - struct mm_struct *mm; gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; @@ -255,7 +257,6 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { - pte_t pte; unsigned long attributes; }; @@ -269,7 +270,6 @@ struct kvm_gfn_range { bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range); #endif enum { @@ -1319,21 +1319,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); * * @gpc: struct gfn_to_pfn_cache object. * @kvm: pointer to kvm instance. - * @vcpu: vCPU to be used for marking pages dirty and to be woken on - * invalidation. - * @usage: indicates if the resulting host physical PFN is used while - * the @vcpu is IN_GUEST_MODE (in which case invalidation of - * the cache from MMU notifiers---but not for KVM memslot - * changes!---will also force @vcpu to exit the guest and - * refresh the cache); and/or if the PFN used directly - * by KVM (and thus needs a kernel virtual mapping). * * This sets up a gfn_to_pfn_cache by initializing locks and assigning the * immutable attributes. Note, the cache must be zero-allocated (or zeroed by * the caller before init). */ -void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, - struct kvm_vcpu *vcpu, enum pfn_cache_usage usage); +void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm); /** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest @@ -1354,6 +1345,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm, int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** + * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA. + * + * @gpc: struct gfn_to_pfn_cache object. + * @hva: userspace virtual address to map. + * @len: sanity check; the range being access must fit a single page. + * + * @return: 0 for success. + * -EINVAL for a mapping which would cross a page boundary. + * -EFAULT for an untranslatable guest physical address. + * + * The semantics of this function are the same as those of kvm_gpc_activate(). It + * merely bypasses a layer of address translation. + */ +int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len); + +/** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. @@ -1399,6 +1406,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); */ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); +static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && !kvm_is_error_gpa(gpc->gpa); +} + +static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc) +{ + return gpc->active && kvm_is_error_gpa(gpc->gpa); +} + void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); @@ -1505,9 +1522,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); +bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); -int kvm_arch_create_vm_debugfs(struct kvm *kvm); +void kvm_arch_create_vm_debugfs(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* @@ -1788,11 +1806,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) return (hpa_t)pfn << PAGE_SHIFT; } -static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) +static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); - return kvm_is_error_hva(hva); + return !kvm_is_error_hva(hva); +} + +static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc) +{ + lockdep_assert_held(&gpc->lock); + + if (!gpc->memslot) + return; + + mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa)); } enum kvm_stat_kind { diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 9d1f7835d8c1..827ecc0b7e10 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -49,12 +49,6 @@ typedef u64 hfn_t; typedef hfn_t kvm_pfn_t; -enum pfn_cache_usage { - KVM_GUEST_USES_PFN = BIT(0), - KVM_HOST_USES_PFN = BIT(1), - KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN, -}; - struct gfn_to_hva_cache { u64 generation; gpa_t gpa; @@ -69,13 +63,11 @@ struct gfn_to_pfn_cache { unsigned long uhva; struct kvm_memory_slot *memslot; struct kvm *kvm; - struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; - enum pfn_cache_usage usage; bool active; bool valid; }; @@ -94,6 +86,7 @@ struct gfn_to_pfn_cache { struct kvm_mmu_memory_cache { gfp_t gfp_zero; gfp_t gfp_custom; + u64 init_value; struct kmem_cache *kmem_cache; int capacity; int nobjs; diff --git a/include/linux/lcd.h b/include/linux/lcd.h index 238fb1dfed98..68703a51dc53 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -61,7 +61,7 @@ struct lcd_device { points to something in the body of that driver, it is also invalid. */ struct mutex ops_lock; /* If this is NULL, the backing module is unloaded */ - struct lcd_ops *ops; + const struct lcd_ops *ops; /* Serialise access to set_power method */ struct mutex update_lock; /* The framebuffer notifier block */ @@ -102,10 +102,10 @@ static inline void lcd_set_power(struct lcd_device *ld, int power) } extern struct lcd_device *lcd_device_register(const char *name, - struct device *parent, void *devdata, struct lcd_ops *ops); + struct device *parent, void *devdata, const struct lcd_ops *ops); extern struct lcd_device *devm_lcd_device_register(struct device *dev, const char *name, struct device *parent, - void *devdata, struct lcd_ops *ops); + void *devdata, const struct lcd_ops *ops); extern void lcd_device_unregister(struct lcd_device *ld); extern void devm_lcd_device_unregister(struct device *dev, struct lcd_device *ld); diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 612b4cab3819..36df927ec4b7 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -85,7 +85,6 @@ static inline struct led_classdev_flash *lcdev_to_flcdev( return container_of(lcdev, struct led_classdev_flash, led_cdev); } -#if IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) /** * led_classdev_flash_register_ext - register a new object of LED class with * init data and with support for flash LEDs @@ -116,29 +115,6 @@ int devm_led_classdev_flash_register_ext(struct device *parent, void devm_led_classdev_flash_unregister(struct device *parent, struct led_classdev_flash *fled_cdev); -#else - -static inline int led_classdev_flash_register_ext(struct device *parent, - struct led_classdev_flash *fled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev) {}; -static inline int devm_led_classdev_flash_register_ext(struct device *parent, - struct led_classdev_flash *fled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void devm_led_classdev_flash_unregister(struct device *parent, - struct led_classdev_flash *fled_cdev) -{}; - -#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) */ - static inline int led_classdev_flash_register(struct device *parent, struct led_classdev_flash *fled_cdev) { diff --git a/include/linux/led-class-multicolor.h b/include/linux/led-class-multicolor.h index 210d57bcd767..db9f34c6736e 100644 --- a/include/linux/led-class-multicolor.h +++ b/include/linux/led-class-multicolor.h @@ -30,7 +30,6 @@ static inline struct led_classdev_mc *lcdev_to_mccdev( return container_of(led_cdev, struct led_classdev_mc, led_cdev); } -#if IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) /** * led_classdev_multicolor_register_ext - register a new object of led_classdev * class with support for multicolor LEDs @@ -64,34 +63,6 @@ int devm_led_classdev_multicolor_register_ext(struct device *parent, void devm_led_classdev_multicolor_unregister(struct device *parent, struct led_classdev_mc *mcled_cdev); -#else - -static inline int led_classdev_multicolor_register_ext(struct device *parent, - struct led_classdev_mc *mcled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev) {}; -static inline int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev, - enum led_brightness brightness) -{ - return 0; -} - -static inline int devm_led_classdev_multicolor_register_ext(struct device *parent, - struct led_classdev_mc *mcled_cdev, - struct led_init_data *init_data) -{ - return 0; -} - -static inline void devm_led_classdev_multicolor_unregister(struct device *parent, - struct led_classdev_mc *mcled_cdev) -{}; - -#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */ static inline int led_classdev_multicolor_register(struct device *parent, struct led_classdev_mc *mcled_cdev) diff --git a/include/linux/leds-expresswire.h b/include/linux/leds-expresswire.h new file mode 100644 index 000000000000..a422921f4159 --- /dev/null +++ b/include/linux/leds-expresswire.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Shared library for Kinetic's ExpressWire protocol. + * This protocol works by pulsing the ExpressWire IC's control GPIO. + * ktd2692 and ktd2801 are known to use this protocol. + */ + +#ifndef _LEDS_EXPRESSWIRE_H +#define _LEDS_EXPRESSWIRE_H + +#include <linux/types.h> + +struct gpio_desc; + +struct expresswire_timing { + unsigned long poweroff_us; + unsigned long detect_delay_us; + unsigned long detect_us; + unsigned long data_start_us; + unsigned long end_of_data_low_us; + unsigned long end_of_data_high_us; + unsigned long short_bitset_us; + unsigned long long_bitset_us; +}; + +struct expresswire_common_props { + struct gpio_desc *ctrl_gpio; + struct expresswire_timing timing; +}; + +void expresswire_power_off(struct expresswire_common_props *props); +void expresswire_enable(struct expresswire_common_props *props); +void expresswire_start(struct expresswire_common_props *props); +void expresswire_end(struct expresswire_common_props *props); +void expresswire_set_bit(struct expresswire_common_props *props, bool bit); +void expresswire_write_u8(struct expresswire_common_props *props, u8 val); + +#endif /* _LEDS_EXPRESSWIRE_H */ diff --git a/include/linux/leds.h b/include/linux/leds.h index 4754b02d3a2c..6300313c46b7 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -82,15 +82,7 @@ struct led_init_data { bool devname_mandatory; }; -#if IS_ENABLED(CONFIG_NEW_LEDS) enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode); -#else -static inline enum led_default_state -led_init_default_state_get(struct fwnode_handle *fwnode) -{ - return LEDS_DEFSTATE_OFF; -} -#endif struct led_hw_trigger_type { int dummy; @@ -279,20 +271,9 @@ static inline int led_classdev_register(struct device *parent, return led_classdev_register_ext(parent, led_cdev, NULL); } -#if IS_ENABLED(CONFIG_LEDS_CLASS) int devm_led_classdev_register_ext(struct device *parent, struct led_classdev *led_cdev, struct led_init_data *init_data); -#else -static inline int -devm_led_classdev_register_ext(struct device *parent, - struct led_classdev *led_cdev, - struct led_init_data *init_data) -{ - return 0; -} -#endif - static inline int devm_led_classdev_register(struct device *parent, struct led_classdev *led_cdev) { @@ -474,6 +455,9 @@ struct led_trigger { int (*activate)(struct led_classdev *led_cdev); void (*deactivate)(struct led_classdev *led_cdev); + /* Brightness set by led_trigger_event */ + enum led_brightness brightness; + /* LED-private triggers have this set */ struct led_hw_trigger_type *trigger_type; @@ -527,6 +511,12 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) return led_cdev->trigger_data; } +static inline enum led_brightness +led_trigger_get_brightness(const struct led_trigger *trigger) +{ + return trigger ? trigger->brightness : LED_OFF; +} + #define module_led_trigger(__led_trigger) \ module_driver(__led_trigger, led_trigger_register, \ led_trigger_unregister) @@ -563,6 +553,12 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) return NULL; } +static inline enum led_brightness +led_trigger_get_brightness(const struct led_trigger *trigger) +{ + return LED_OFF; +} + #endif /* CONFIG_LEDS_TRIGGERS */ /* Trigger specific enum */ @@ -658,7 +654,7 @@ struct gpio_led_platform_data { gpio_blink_set_t gpio_blink_set; }; -#ifdef CONFIG_NEW_LEDS +#ifdef CONFIG_LEDS_GPIO_REGISTER struct platform_device *gpio_led_register_device( int id, const struct gpio_led_platform_data *pdata); #else @@ -709,18 +705,4 @@ enum led_audio { NUM_AUDIO_LEDS }; -#if IS_ENABLED(CONFIG_LEDS_TRIGGER_AUDIO) -enum led_brightness ledtrig_audio_get(enum led_audio type); -void ledtrig_audio_set(enum led_audio type, enum led_brightness state); -#else -static inline enum led_brightness ledtrig_audio_get(enum led_audio type) -{ - return LED_OFF; -} -static inline void ledtrig_audio_set(enum led_audio type, - enum led_brightness state) -{ -} -#endif - #endif /* __LINUX_LEDS_H_INCLUDED */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 26d68115afb8..13fb41d25da6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -107,6 +107,7 @@ enum { ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ @@ -1151,12 +1152,19 @@ extern int ata_std_bios_param(struct scsi_device *sdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); extern int ata_scsi_slave_alloc(struct scsi_device *sdev); -extern int ata_scsi_slave_config(struct scsi_device *sdev); +int ata_scsi_device_configure(struct scsi_device *sdev, + struct queue_limits *lim); extern void ata_scsi_slave_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth); extern int ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev, int queue_depth); +extern int ata_ncq_prio_supported(struct ata_port *ap, struct scsi_device *sdev, + bool *supported); +extern int ata_ncq_prio_enabled(struct ata_port *ap, struct scsi_device *sdev, + bool *enabled); +extern int ata_ncq_prio_enable(struct ata_port *ap, struct scsi_device *sdev, + bool enable); extern struct ata_device *ata_dev_pair(struct ata_device *adev); extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev); extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap); @@ -1243,7 +1251,8 @@ extern struct ata_port *ata_sas_port_alloc(struct ata_host *, extern void ata_port_probe(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); -extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *); +int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, + struct ata_port *ap); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); @@ -1409,13 +1418,13 @@ extern const struct attribute_group *ata_common_sdev_groups[]; __ATA_BASE_SHT(drv_name), \ .can_queue = ATA_DEF_QUEUE, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .slave_configure = ata_scsi_slave_config + .device_configure = ata_scsi_device_configure #define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \ __ATA_BASE_SHT(drv_name), \ .can_queue = drv_qd, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .slave_configure = ata_scsi_slave_config + .device_configure = ata_scsi_device_configure #define ATA_BASE_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 287f590ed56b..d94bfd9ac8cc 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -43,29 +43,10 @@ static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1, return bitmap_andnot(dst, src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static inline void linkmode_set_bit(int nr, volatile unsigned long *addr) -{ - __set_bit(nr, addr); -} - -static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr) -{ - __clear_bit(nr, addr); -} - -static inline void linkmode_mod_bit(int nr, volatile unsigned long *addr, - int set) -{ - if (set) - linkmode_set_bit(nr, addr); - else - linkmode_clear_bit(nr, addr); -} - -static inline int linkmode_test_bit(int nr, const volatile unsigned long *addr) -{ - return test_bit(nr, addr); -} +#define linkmode_test_bit test_bit +#define linkmode_set_bit __set_bit +#define linkmode_clear_bit __clear_bit +#define linkmode_mod_bit __assign_bit static inline void linkmode_set_bit_array(const int *array, int array_size, unsigned long *addr) diff --git a/include/linux/list.h b/include/linux/list.h index 059aa1fff41e..5f4b0a39cf46 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -766,7 +766,7 @@ static inline size_t list_count_nodes(struct list_head *head) * @member: the name of the list_head within the struct. */ #define list_entry_is_head(pos, head, member) \ - (&pos->member == (head)) + list_is_head(&pos->member, (head)) /** * list_for_each_entry - iterate over list of given type @@ -1195,4 +1195,19 @@ static inline void hlist_splice_init(struct hlist_head *from, pos && ({ n = pos->member.next; 1; }); \ pos = hlist_entry_safe(n, typeof(*pos), member)) +/** + * hlist_count_nodes - count nodes in the hlist + * @head: the head for your hlist. + */ +static inline size_t hlist_count_nodes(struct hlist_head *head) +{ + struct hlist_node *pos; + size_t count = 0; + + hlist_for_each(pos, head) + count++; + + return count; +} + #endif diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 7675a48a0701..792b67ceb631 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -24,6 +24,8 @@ enum lru_status { LRU_SKIP, /* item cannot be locked, skip */ LRU_RETRY, /* item not freeable. May drop the lock internally, but has to return locked. */ + LRU_STOP, /* stop lru list walking. May drop the lock + internally, but has to return locked. */ }; struct list_lru_one { @@ -62,8 +64,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, #define list_lru_init(lru) \ __list_lru_init((lru), false, NULL, NULL) -#define list_lru_init_key(lru, key) \ - __list_lru_init((lru), false, (key), NULL) #define list_lru_init_memcg(lru, shrinker) \ __list_lru_init((lru), true, NULL, shrinker) @@ -170,22 +170,6 @@ static inline unsigned long list_lru_count(struct list_lru *lru) void list_lru_isolate(struct list_lru_one *list, struct list_head *item); void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, struct list_head *head); -/** - * list_lru_putback: undo list_lru_isolate - * @lru: the lru pointer. - * @item: the item to put back. - * @nid: the node id of the sublist to put the item back to. - * @memcg: the cgroup of the sublist to put the item back to. - * - * Put back an isolated item into its original LRU. Note that unlike - * list_lru_add, this does not increment the node LRU count (as - * list_lru_isolate does not originally decrement this count). - * - * Since we might have dropped the LRU lock in between, recompute list_lru_one - * from the node's id and memcg. - */ -void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid, - struct mem_cgroup *memcg); typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, struct list_lru_one *list, spinlock_t *lock, void *cb_arg); diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 9b9b38e89563..51a258c24ff5 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -18,9 +18,9 @@ #if IS_ENABLED(CONFIG_LIVEPATCH) /* task patch states */ -#define KLP_UNDEFINED -1 -#define KLP_UNPATCHED 0 -#define KLP_PATCHED 1 +#define KLP_TRANSITION_IDLE -1 +#define KLP_TRANSITION_UNPATCHED 0 +#define KLP_TRANSITION_PATCHED 1 /** * struct klp_func - function structure for live patching diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 9f565416d186..1b95fe31051f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -375,12 +375,12 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) static inline int nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) { - return file_inode(fl1->fl_file) == file_inode(fl2->fl_file) - && fl1->fl_pid == fl2->fl_pid - && fl1->fl_owner == fl2->fl_owner + return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file) + && fl1->c.flc_pid == fl2->c.flc_pid + && fl1->c.flc_owner == fl2->c.flc_owner && fl1->fl_start == fl2->fl_start && fl1->fl_end == fl2->fl_end - &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK); + &&(fl1->c.flc_type == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK); } extern const struct lock_manager_operations nlmsvc_lock_operations; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index b60fbcd8cdfa..80cca9426761 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -52,7 +52,7 @@ struct nlm_lock { * FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to * 32 bytes. */ - + struct nlm_cookie { unsigned char data[NLM_MAXCOOKIELEN]; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 08b0d1d9d78b..5e51b0de4c4b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -297,6 +297,9 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); .wait_type_inner = _wait_type, \ .lock_type = LD_LOCK_WAIT_OVERRIDE, } +#define lock_map_assert_held(l) \ + lockdep_assert(lock_is_held(l) != LOCK_STATE_NOT_HELD) + #else /* !CONFIG_LOCKDEP */ static inline void lockdep_init_task(struct task_struct *task) @@ -388,6 +391,8 @@ extern int lockdep_is_held(const void *); #define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \ struct lockdep_map __maybe_unused _name = {} +#define lock_map_assert_held(l) do { (void)(l); } while (0) + #endif /* !LOCKDEP */ #ifdef CONFIG_PROVE_LOCKING diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 76458b6d53da..f804b76cde44 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -94,6 +94,8 @@ LSM_HOOK(int, 0, path_mkdir, const struct path *dir, struct dentry *dentry, LSM_HOOK(int, 0, path_rmdir, const struct path *dir, struct dentry *dentry) LSM_HOOK(int, 0, path_mknod, const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev) +LSM_HOOK(void, LSM_RET_VOID, path_post_mknod, struct mnt_idmap *idmap, + struct dentry *dentry) LSM_HOOK(int, 0, path_truncate, const struct path *path) LSM_HOOK(int, 0, path_symlink, const struct path *dir, struct dentry *dentry, const char *old_name) @@ -119,6 +121,8 @@ LSM_HOOK(int, 0, inode_init_security_anon, struct inode *inode, const struct qstr *name, const struct inode *context_inode) LSM_HOOK(int, 0, inode_create, struct inode *dir, struct dentry *dentry, umode_t mode) +LSM_HOOK(void, LSM_RET_VOID, inode_post_create_tmpfile, struct mnt_idmap *idmap, + struct inode *inode) LSM_HOOK(int, 0, inode_link, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) LSM_HOOK(int, 0, inode_unlink, struct inode *dir, struct dentry *dentry) @@ -135,7 +139,10 @@ LSM_HOOK(int, 0, inode_readlink, struct dentry *dentry) LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode, bool rcu) LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask) -LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr) +LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr) +LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap, + struct dentry *dentry, int ia_valid) LSM_HOOK(int, 0, inode_getattr, const struct path *path) LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, @@ -146,12 +153,18 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_removexattr, struct dentry *dentry, + const char *name) LSM_HOOK(int, 0, inode_set_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) +LSM_HOOK(void, LSM_RET_VOID, inode_post_set_acl, struct dentry *dentry, + const char *acl_name, struct posix_acl *kacl) LSM_HOOK(int, 0, inode_get_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_remove_acl, struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) +LSM_HOOK(void, LSM_RET_VOID, inode_post_remove_acl, struct mnt_idmap *idmap, + struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct mnt_idmap *idmap, struct dentry *dentry) @@ -163,11 +176,13 @@ LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) -LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, const char *name) +LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src, + const char *name) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) LSM_HOOK(int, 0, file_alloc_security, struct file *file) +LSM_HOOK(void, LSM_RET_VOID, file_release, struct file *file) LSM_HOOK(void, LSM_RET_VOID, file_free_security, struct file *file) LSM_HOOK(int, 0, file_ioctl, struct file *file, unsigned int cmd, unsigned long arg) @@ -186,6 +201,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, file_post_open, struct file *file, int mask) LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, unsigned long clone_flags) @@ -265,9 +281,9 @@ LSM_HOOK(int, 0, netlink_send, struct sock *sk, struct sk_buff *skb) LSM_HOOK(void, LSM_RET_VOID, d_instantiate, struct dentry *dentry, struct inode *inode) LSM_HOOK(int, -EOPNOTSUPP, getselfattr, unsigned int attr, - struct lsm_ctx __user *ctx, size_t *size, u32 flags) + struct lsm_ctx __user *ctx, u32 *size, u32 flags) LSM_HOOK(int, -EOPNOTSUPP, setselfattr, unsigned int attr, - struct lsm_ctx *ctx, size_t size, u32 flags) + struct lsm_ctx *ctx, u32 size, u32 flags) LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) @@ -390,6 +406,9 @@ LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **buffer) +LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, + struct key *key, const void *payload, size_t payload_len, + unsigned long flags, bool create) #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -404,10 +423,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) -LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map) -LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map) -LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux) -LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) +LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) +LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) +LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, + struct path *path) +LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token) +LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd) +LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index b3d63123b945..a53ad4dabd7e 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -171,6 +171,7 @@ enum maple_type { #define MT_FLAGS_LOCK_IRQ 0x100 #define MT_FLAGS_LOCK_BH 0x200 #define MT_FLAGS_LOCK_EXTERN 0x300 +#define MT_FLAGS_ALLOC_WRAPPED 0x0800 #define MAPLE_HEIGHT_MAX 31 @@ -319,6 +320,9 @@ int mtree_insert_range(struct maple_tree *mt, unsigned long first, int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); +int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp, void *entry, unsigned long size, unsigned long min, unsigned long max, gfp_t gfp); @@ -499,6 +503,9 @@ void *mas_find_range(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); void *mas_find_range_rev(struct ma_state *mas, unsigned long max); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp, + void *entry, unsigned long range_lo, unsigned long range_hi, + unsigned long *next, gfp_t gfp); bool mas_nomem(struct ma_state *mas, gfp_t gfp); void mas_pause(struct ma_state *mas); diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index 9b54c4f0677f..b1fbe4118414 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -7,6 +7,7 @@ /* Known PHY IDs */ #define MARVELL_PHY_ID_88E1101 0x01410c60 +#define MARVELL_PHY_ID_88E3082 0x01410c80 #define MARVELL_PHY_ID_88E1112 0x01410c90 #define MARVELL_PHY_ID_88E1111 0x01410cc0 #define MARVELL_PHY_ID_88E1118 0x01410e10 @@ -26,10 +27,13 @@ #define MARVELL_PHY_ID_88E2110 0x002b09b0 #define MARVELL_PHY_ID_88X2222 0x01410f10 #define MARVELL_PHY_ID_88Q2110 0x002b0980 +#define MARVELL_PHY_ID_88Q2220 0x002b0b20 /* Marvel 88E1111 in Finisar SFP module with modified PHY ID */ #define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0 +/* ID from 88E6020, assumed to be the same for the whole 6250 family */ +#define MARVELL_PHY_ID_88E6250_FAMILY 0x01410db0 /* These Ethernet switch families contain embedded PHYs, but they do * not have a model ID. So the switch driver traps reads to the ID2 * register and returns the switch family ID diff --git a/include/linux/math64.h b/include/linux/math64.h index bf74478926d4..d34def7f9a8c 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -4,8 +4,8 @@ #include <linux/types.h> #include <linux/math.h> -#include <vdso/math64.h> #include <asm/div64.h> +#include <vdso/math64.h> #if BITS_PER_LONG == 64 @@ -179,16 +179,12 @@ static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) #ifndef mul_u64_u32_shr static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) { - u32 ah, al; + u32 ah = a >> 32, al = a; u64 ret; - al = a; - ah = a >> 32; - ret = mul_u32_u32(al, mul) >> shift; if (ah) ret += mul_u32_u32(ah, mul) << (32 - shift); - return ret; } #endif /* mul_u64_u32_shr */ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 79ceee3c8673..68f8d2e970d4 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -373,6 +373,10 @@ static inline void mii_t1_adv_m_mod_linkmode_t(unsigned long *advertising, u32 l { linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising, lpa & MDIO_AN_T1_ADV_M_B10L); + linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_100BT1); + linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, + advertising, lpa & MDIO_AN_T1_ADV_M_1000BT1); } /** @@ -409,6 +413,10 @@ static inline u32 linkmode_adv_to_mii_t1_adv_m_t(unsigned long *advertising) if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, advertising)) result |= MDIO_AN_T1_ADV_M_B10L; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_100BT1; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT1_Full_BIT, advertising)) + result |= MDIO_AN_T1_ADV_M_1000BT1; return result; } @@ -440,6 +448,42 @@ static inline void mii_eee_cap1_mod_linkmode_t(unsigned long *adv, u32 val) } /** + * mii_eee_cap2_mod_linkmode_sup_t() + * @adv: target the linkmode settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.3.11 "EEE control and capability 2" register (3.21) + */ +static inline void mii_eee_cap2_mod_linkmode_sup_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** + * mii_eee_cap2_mod_linkmode_adv_t() + * @adv: target the linkmode advertisement settings + * @val: register value + * + * A function that translates value of following registers to the linkmode: + * IEEE 802.3-2022 45.2.7.16 "EEE advertisement 2" register (7.62) + * IEEE 802.3-2022 45.2.7.17 "EEE link partner ability 2" register (7.63) + * Note: Currently this function is the same as mii_eee_cap2_mod_linkmode_sup_t. + * For certain, not yet supported, modes however the bits differ. + * Therefore create separate functions already. + */ +static inline void mii_eee_cap2_mod_linkmode_adv_t(unsigned long *adv, u32 val) +{ + linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, + adv, val & MDIO_EEE_2_5GT); + linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, + adv, val & MDIO_EEE_5GT); +} + +/** * linkmode_to_mii_eee_cap1_t() * @adv: the linkmode advertisement settings * @@ -467,6 +511,25 @@ static inline u32 linkmode_to_mii_eee_cap1_t(unsigned long *adv) } /** + * linkmode_to_mii_eee_cap2_t() + * @adv: the linkmode advertisement settings + * + * A function that translates linkmode to value for IEEE 802.3-2022 45.2.7.16 + * "EEE advertisement 2" register (7.62) + */ +static inline u32 linkmode_to_mii_eee_cap2_t(unsigned long *adv) +{ + u32 result = 0; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, adv)) + result |= MDIO_EEE_2_5GT; + if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT, adv)) + result |= MDIO_EEE_5GT; + + return result; +} + +/** * mii_10base_t1_adv_mod_linkmode_t() * @adv: linkmode advertisement settings * @val: register value diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 20ff87f8e001..030d34e9d117 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -14,6 +14,7 @@ #include <linux/vm_event_item.h> #include <linux/hardirq.h> #include <linux/jump_label.h> +#include <linux/kernel.h> #include <linux/page_counter.h> #include <linux/vmpressure.h> #include <linux/eventfd.h> @@ -82,6 +83,8 @@ enum mem_cgroup_events_target { struct memcg_vmstats_percpu; struct memcg_vmstats; +struct lruvec_stats_percpu; +struct lruvec_stats; struct mem_cgroup_reclaim_iter { struct mem_cgroup *position; @@ -89,25 +92,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -struct lruvec_stats_percpu { - /* Local (CPU and cgroup) state */ - long state[NR_VM_NODE_STAT_ITEMS]; - - /* Delta calculation for lockless upward propagation */ - long state_prev[NR_VM_NODE_STAT_ITEMS]; -}; - -struct lruvec_stats { - /* Aggregated (CPU and subtree) state */ - long state[NR_VM_NODE_STAT_ITEMS]; - - /* Non-hierarchical (CPU aggregated) state */ - long state_local[NR_VM_NODE_STAT_ITEMS]; - - /* Pending child counts during tree propagation */ - long state_pending[NR_VM_NODE_STAT_ITEMS]; -}; - /* * per-node information in memory controller. */ @@ -115,7 +99,7 @@ struct mem_cgroup_per_node { struct lruvec lruvec; struct lruvec_stats_percpu __percpu *lruvec_stats_percpu; - struct lruvec_stats lruvec_stats; + struct lruvec_stats *lruvec_stats; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; @@ -348,15 +332,32 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; enum page_memcg_data_flags { - /* page->memcg_data is a pointer to an objcgs vector */ - MEMCG_DATA_OBJCGS = (1UL << 0), + /* page->memcg_data is a pointer to an slabobj_ext vector */ + MEMCG_DATA_OBJEXTS = (1UL << 0), /* page has been accounted as a non-slab kernel page */ MEMCG_DATA_KMEM = (1UL << 1), /* the next bit after the last actual flag */ __NR_MEMCG_DATA_FLAGS = (1UL << 2), }; -#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) +#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS + +#else /* CONFIG_MEMCG */ + +#define __FIRST_OBJEXT_FLAG (1UL << 0) + +#endif /* CONFIG_MEMCG */ + +enum objext_flags { + /* slabobj_ext vector failed to allocate */ + OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG, + /* the next bit after the last actual flag */ + __NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1), +}; + +#define OBJEXTS_FLAGS_MASK (__NR_OBJEXTS_FLAGS - 1) + +#ifdef CONFIG_MEMCG static inline bool folio_memcg_kmem(struct folio *folio); @@ -387,10 +388,10 @@ static inline struct mem_cgroup *__folio_memcg(struct folio *folio) unsigned long memcg_data = folio->memcg_data; VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); - VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio); VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio); - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -408,10 +409,10 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio) unsigned long memcg_data = folio->memcg_data; VM_BUG_ON_FOLIO(folio_test_slab(folio), folio); - VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJEXTS, folio); VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio); - return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct obj_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -468,11 +469,11 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg; - objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); return obj_cgroup_memcg(objcg); } - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } /* @@ -505,17 +506,17 @@ static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) */ unsigned long memcg_data = READ_ONCE(folio->memcg_data); - if (memcg_data & MEMCG_DATA_OBJCGS) + if (memcg_data & MEMCG_DATA_OBJEXTS) return NULL; if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg; - objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + objcg = (void *)(memcg_data & ~OBJEXTS_FLAGS_MASK); return obj_cgroup_memcg(objcg); } - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return (struct mem_cgroup *)(memcg_data & ~OBJEXTS_FLAGS_MASK); } static inline struct mem_cgroup *page_memcg_check(struct page *page) @@ -551,7 +552,7 @@ retry: static inline bool folio_memcg_kmem(struct folio *folio) { VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page); - VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio); + VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJEXTS, folio); return folio->memcg_data & MEMCG_DATA_KMEM; } @@ -712,18 +713,16 @@ static inline void mem_cgroup_uncharge(struct folio *folio) __mem_cgroup_uncharge(folio); } -void __mem_cgroup_uncharge_list(struct list_head *page_list); -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +void __mem_cgroup_uncharge_folios(struct folio_batch *folios); +static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { if (mem_cgroup_disabled()) return; - __mem_cgroup_uncharge_list(page_list); + __mem_cgroup_uncharge_folios(folios); } void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages); - void mem_cgroup_replace_folio(struct folio *old, struct folio *new); - void mem_cgroup_migrate(struct folio *old, struct folio *new); /** @@ -819,7 +818,8 @@ static inline void obj_cgroup_get_many(struct obj_cgroup *objcg, static inline void obj_cgroup_put(struct obj_cgroup *objcg) { - percpu_ref_put(&objcg->refcnt); + if (objcg) + percpu_ref_put(&objcg->refcnt); } static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) @@ -974,7 +974,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); void folio_memcg_lock(struct folio *folio); void folio_memcg_unlock(struct folio *folio); -void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, + int val); /* try to stablize folio_memcg() for all the pages in a memcg */ static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) @@ -995,7 +996,7 @@ static inline void mem_cgroup_unlock_pages(void) /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, - int idx, int val) + enum memcg_stat_item idx, int val) { unsigned long flags; @@ -1005,7 +1006,7 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, } static inline void mod_memcg_page_state(struct page *page, - int idx, int val) + enum memcg_stat_item idx, int val) { struct mem_cgroup *memcg; @@ -1020,48 +1021,13 @@ static inline void mod_memcg_page_state(struct page *page, } unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx); - -static inline unsigned long lruvec_page_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - struct mem_cgroup_per_node *pn; - long x; - - if (mem_cgroup_disabled()) - return node_page_state(lruvec_pgdat(lruvec), idx); - - pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - x = READ_ONCE(pn->lruvec_stats.state[idx]); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} - -static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, - enum node_stat_item idx) -{ - struct mem_cgroup_per_node *pn; - long x = 0; - - if (mem_cgroup_disabled()) - return node_page_state(lruvec_pgdat(lruvec), idx); - - pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - x = READ_ONCE(pn->lruvec_stats.state_local[idx]); -#ifdef CONFIG_SMP - if (x < 0) - x = 0; -#endif - return x; -} +unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx); +unsigned long lruvec_page_state_local(struct lruvec *lruvec, + enum node_stat_item idx); void mem_cgroup_flush_stats(struct mem_cgroup *memcg); void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg); -void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, - int val); void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val); static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, @@ -1074,16 +1040,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, local_irq_restore(flags); } -static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_memcg_lruvec_state(lruvec, idx, val); - local_irq_restore(flags); -} - void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count); @@ -1162,7 +1118,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -void split_page_memcg(struct page *head, unsigned int nr); +void split_page_memcg(struct page *head, int old_order, int new_order); unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1294,7 +1250,7 @@ static inline void mem_cgroup_uncharge(struct folio *folio) { } -static inline void mem_cgroup_uncharge_list(struct list_head *page_list) +static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { } @@ -1536,19 +1492,19 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } static inline void __mod_memcg_state(struct mem_cgroup *memcg, - int idx, + enum memcg_stat_item idx, int nr) { } static inline void mod_memcg_state(struct mem_cgroup *memcg, - int idx, + enum memcg_stat_item idx, int nr) { } static inline void mod_memcg_page_state(struct page *page, - int idx, int val) + enum memcg_stat_item idx, int val) { } @@ -1577,11 +1533,6 @@ static inline void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg) { } -static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ -} - static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val) { @@ -1620,7 +1571,7 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void split_page_memcg(struct page *head, unsigned int nr) +static inline void split_page_memcg(struct page *head, int old_order, int new_order) { } @@ -1633,6 +1584,19 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, } #endif /* CONFIG_MEMCG */ +/* + * Extended information for slab objects stored as an array in page->memcg_data + * if MEMCG_DATA_OBJEXTS is set. + */ +struct slabobj_ext { +#ifdef CONFIG_MEMCG_KMEM + struct obj_cgroup *objcg; +#endif +#ifdef CONFIG_MEM_ALLOC_PROFILING + union codetag_ref ref; +#endif +} __aligned(8); + static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx) { __mod_lruvec_kmem_state(p, idx, 1); @@ -1694,18 +1658,18 @@ static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio, return folio_lruvec_lock_irq(folio); } -/* Don't lock again iff page's lruvec locked */ -static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio, - struct lruvec *locked_lruvec, unsigned long *flags) +/* Don't lock again iff folio's lruvec locked */ +static inline void folio_lruvec_relock_irqsave(struct folio *folio, + struct lruvec **lruvecp, unsigned long *flags) { - if (locked_lruvec) { - if (folio_matches_lruvec(folio, locked_lruvec)) - return locked_lruvec; + if (*lruvecp) { + if (folio_matches_lruvec(folio, *lruvecp)) + return; - unlock_page_lruvec_irqrestore(locked_lruvec, *flags); + unlock_page_lruvec_irqrestore(*lruvecp, *flags); } - return folio_lruvec_lock_irqsave(folio, flags); + *lruvecp = folio_lruvec_lock_irqsave(folio, flags); } #ifdef CONFIG_CGROUP_WRITEBACK diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 69e781900082..0d70788558f4 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -48,6 +48,9 @@ int mt_calc_adistance(int node, int *adist); int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source); int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); +struct memory_dev_type *mt_find_alloc_memory_type(int adist, + struct list_head *memory_types); +void mt_put_memory_types(struct list_head *memory_types); #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -136,5 +139,15 @@ static inline int mt_perf_to_adistance(struct access_coordinate *perf, int *adis { return -EIO; } + +static inline struct memory_dev_type *mt_find_alloc_memory_type(int adist, + struct list_head *memory_types) +{ + return NULL; +} + +static inline void mt_put_memory_types(struct list_head *memory_types) +{ +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index f53cfdaaaa41..c0afee5d126e 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -96,8 +96,17 @@ int set_memory_block_size_order(unsigned int order); #define MEM_GOING_ONLINE (1<<3) #define MEM_CANCEL_ONLINE (1<<4) #define MEM_CANCEL_OFFLINE (1<<5) +#define MEM_PREPARE_ONLINE (1<<6) +#define MEM_FINISH_OFFLINE (1<<7) struct memory_notify { + /* + * The altmap_start_pfn and altmap_nr_pages fields are designated for + * specifying the altmap range and are exclusively intended for use in + * MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers. + */ + unsigned long altmap_start_pfn; + unsigned long altmap_nr_pages; unsigned long start_pfn; unsigned long nr_pages; int status_change_nid_normal; @@ -114,6 +123,7 @@ struct mem_section; #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 #define HMAT_CALLBACK_PRI 2 +#define CXL_CALLBACK_PRI 5 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7d2076583494..7a9ff464608d 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -106,6 +106,22 @@ typedef int __bitwise mhp_t; * implies the node id (nid). */ #define MHP_NID_IS_MGID ((__force mhp_t)BIT(2)) +/* + * The hotplugged memory is completely inaccessible while the memory is + * offline. The memory provider will handle MEM_PREPARE_ONLINE / + * MEM_FINISH_OFFLINE notifications and make the memory accessible. + * + * This flag is only relevant when used along with MHP_MEMMAP_ON_MEMORY, + * because the altmap cannot be written (e.g., poisoned) when adding + * memory -- before it is set online. + * + * This allows for adding memory with an altmap that is not currently + * made available by a hypervisor. When onlining that memory, the + * hypervisor can be instructed to make that memory available, and + * the onlining phase will not require any memory allocations, which is + * helpful in low-memory situations. + */ +#define MHP_OFFLINE_INACCESSIBLE ((__force mhp_t)BIT(3)) /* * Extended parameters for memory hotplug: @@ -121,6 +137,7 @@ struct mhp_params { bool mhp_range_allowed(u64 start, u64 size, bool need_mapping); struct range mhp_get_pluggable_range(bool need_mapping); +bool mhp_supports_memmap_on_memory(void); /* * Zone resizing functions @@ -154,7 +171,7 @@ extern void adjust_present_page_count(struct page *page, long nr_pages); /* VM interface that may be used by firmware interface */ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, - struct zone *zone); + struct zone *zone, bool mhp_off_inaccessible); extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); @@ -262,6 +279,11 @@ static inline bool movable_node_is_enabled(void) return false; } +static inline bool mhp_supports_memmap_on_memory(void) +{ + return false; +} + static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {} static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {} diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 931b118336f4..1add16f21612 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -167,7 +167,8 @@ extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); -int mpol_misplaced(struct folio *, struct vm_area_struct *, unsigned long); +int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, + unsigned long addr); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) @@ -282,7 +283,7 @@ static inline int mpol_parse_str(char *str, struct mempolicy **mpol) #endif static inline int mpol_misplaced(struct folio *folio, - struct vm_area_struct *vma, + struct vm_fault *vmf, unsigned long address) { return -1; /* no node preference */ diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 7be1e32e6d42..7b151441341b 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -5,6 +5,8 @@ #ifndef _LINUX_MEMPOOL_H #define _LINUX_MEMPOOL_H +#include <linux/sched.h> +#include <linux/alloc_tag.h> #include <linux/wait.h> #include <linux/compiler.h> @@ -39,18 +41,32 @@ void mempool_exit(mempool_t *pool); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id); -int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, + +int mempool_init_noprof(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); +#define mempool_init(...) \ + alloc_hooks(mempool_init_noprof(__VA_ARGS__)) extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data); -extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, + +extern mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int nid); +#define mempool_create_node(...) \ + alloc_hooks(mempool_create_node_noprof(__VA_ARGS__)) + +#define mempool_create(_min_nr, _alloc_fn, _free_fn, _pool_data) \ + mempool_create_node(_min_nr, _alloc_fn, _free_fn, _pool_data, \ + GFP_KERNEL, NUMA_NO_NODE) extern int mempool_resize(mempool_t *pool, int new_min_nr); extern void mempool_destroy(mempool_t *pool); -extern void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc; + +extern void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) __malloc; +#define mempool_alloc(...) \ + alloc_hooks(mempool_alloc_noprof(__VA_ARGS__)) + extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc; extern void mempool_free(void *element, mempool_t *pool); @@ -62,19 +78,10 @@ extern void mempool_free(void *element, mempool_t *pool); void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data); void mempool_free_slab(void *element, void *pool_data); -static inline int -mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc) -{ - return mempool_init(pool, min_nr, mempool_alloc_slab, - mempool_free_slab, (void *) kc); -} - -static inline mempool_t * -mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) -{ - return mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab, - (void *) kc); -} +#define mempool_init_slab_pool(_pool, _min_nr, _kc) \ + mempool_init(_pool, (_min_nr), mempool_alloc_slab, mempool_free_slab, (void *)(_kc)) +#define mempool_create_slab_pool(_min_nr, _kc) \ + mempool_create((_min_nr), mempool_alloc_slab, mempool_free_slab, (void *)(_kc)) /* * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the @@ -83,16 +90,24 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); void mempool_kfree(void *element, void *pool_data); -static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size) +#define mempool_init_kmalloc_pool(_pool, _min_nr, _size) \ + mempool_init(_pool, (_min_nr), mempool_kmalloc, mempool_kfree, \ + (void *)(unsigned long)(_size)) +#define mempool_create_kmalloc_pool(_min_nr, _size) \ + mempool_create((_min_nr), mempool_kmalloc, mempool_kfree, \ + (void *)(unsigned long)(_size)) + +void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data); +void mempool_kvfree(void *element, void *pool_data); + +static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size) { - return mempool_init(pool, min_nr, mempool_kmalloc, - mempool_kfree, (void *) size); + return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); } -static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) +static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size) { - return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, - (void *) size); + return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size); } /* @@ -102,16 +117,11 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data); void mempool_free_pages(void *element, void *pool_data); -static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order) -{ - return mempool_init(pool, min_nr, mempool_alloc_pages, - mempool_free_pages, (void *)(long)order); -} - -static inline mempool_t *mempool_create_page_pool(int min_nr, int order) -{ - return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages, - (void *)(long)order); -} +#define mempool_init_page_pool(_pool, _min_nr, _order) \ + mempool_init(_pool, (_min_nr), mempool_alloc_pages, \ + mempool_free_pages, (void *)(long)(_order)) +#define mempool_create_page_pool(_min_nr, _order) \ + mempool_create((_min_nr), mempool_alloc_pages, \ + mempool_free_pages, (void *)(long)(_order)) #endif /* _LINUX_MEMPOOL_H */ diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 744c830f4b13..3f7143ade32c 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -25,6 +25,7 @@ struct vmem_altmap { unsigned long free; unsigned long align; unsigned long alloc; + bool inaccessible; }; /* @@ -108,7 +109,7 @@ struct dev_pagemap_ops { * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @done: completion for @ref - * @type: memory type: see MEMORY_* in memory_hotplug.h + * @type: memory type: see MEMORY_* above in memremap.h * @flags: PGMAP_* flags to specify defailed behavior * @vmemmap_shift: structural definition of how the vmemmap page metadata * is populated, specifically the metadata page order. diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index f1755163dd9f..8c0a33a2e9ce 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -19,6 +19,7 @@ enum axp20x_variants { AXP223_ID, AXP288_ID, AXP313A_ID, + AXP717_ID, AXP803_ID, AXP806_ID, AXP809_ID, @@ -104,15 +105,47 @@ enum axp20x_variants { #define AXP313A_ON_INDICATE 0x00 #define AXP313A_OUTPUT_CONTROL 0x10 -#define AXP313A_DCDC1_CONRTOL 0x13 -#define AXP313A_DCDC2_CONRTOL 0x14 -#define AXP313A_DCDC3_CONRTOL 0x15 -#define AXP313A_ALDO1_CONRTOL 0x16 -#define AXP313A_DLDO1_CONRTOL 0x17 +#define AXP313A_DCDC1_CONTROL 0x13 +#define AXP313A_DCDC2_CONTROL 0x14 +#define AXP313A_DCDC3_CONTROL 0x15 +#define AXP313A_ALDO1_CONTROL 0x16 +#define AXP313A_DLDO1_CONTROL 0x17 #define AXP313A_SHUTDOWN_CTRL 0x1a #define AXP313A_IRQ_EN 0x20 #define AXP313A_IRQ_STATE 0x21 +#define AXP717_ON_INDICATE 0x00 +#define AXP717_IRQ0_EN 0x40 +#define AXP717_IRQ1_EN 0x41 +#define AXP717_IRQ2_EN 0x42 +#define AXP717_IRQ3_EN 0x43 +#define AXP717_IRQ4_EN 0x44 +#define AXP717_IRQ0_STATE 0x48 +#define AXP717_IRQ1_STATE 0x49 +#define AXP717_IRQ2_STATE 0x4a +#define AXP717_IRQ3_STATE 0x4b +#define AXP717_IRQ4_STATE 0x4c +#define AXP717_DCDC_OUTPUT_CONTROL 0x80 +#define AXP717_DCDC1_CONTROL 0x83 +#define AXP717_DCDC2_CONTROL 0x84 +#define AXP717_DCDC3_CONTROL 0x85 +#define AXP717_DCDC4_CONTROL 0x86 +#define AXP717_LDO0_OUTPUT_CONTROL 0x90 +#define AXP717_LDO1_OUTPUT_CONTROL 0x91 +#define AXP717_ALDO1_CONTROL 0x93 +#define AXP717_ALDO2_CONTROL 0x94 +#define AXP717_ALDO3_CONTROL 0x95 +#define AXP717_ALDO4_CONTROL 0x96 +#define AXP717_BLDO1_CONTROL 0x97 +#define AXP717_BLDO2_CONTROL 0x98 +#define AXP717_BLDO3_CONTROL 0x99 +#define AXP717_BLDO4_CONTROL 0x9a +#define AXP717_CLDO1_CONTROL 0x9b +#define AXP717_CLDO2_CONTROL 0x9c +#define AXP717_CLDO3_CONTROL 0x9d +#define AXP717_CLDO4_CONTROL 0x9e +#define AXP717_CPUSLDO_CONTROL 0x9f + #define AXP806_STARTUP_SRC 0x00 #define AXP806_CHIP_ID 0x03 #define AXP806_PWR_OUT_CTRL1 0x10 @@ -434,6 +467,27 @@ enum { }; enum { + AXP717_DCDC1 = 0, + AXP717_DCDC2, + AXP717_DCDC3, + AXP717_DCDC4, + AXP717_ALDO1, + AXP717_ALDO2, + AXP717_ALDO3, + AXP717_ALDO4, + AXP717_BLDO1, + AXP717_BLDO2, + AXP717_BLDO3, + AXP717_BLDO4, + AXP717_CLDO1, + AXP717_CLDO2, + AXP717_CLDO3, + AXP717_CLDO4, + AXP717_CPUSLDO, + AXP717_REG_ID_MAX, +}; + +enum { AXP806_DCDCA = 0, AXP806_DCDCB, AXP806_DCDCC, @@ -732,6 +786,40 @@ enum axp313a_irqs { AXP313A_IRQ_PEK_RIS_EDGE, }; +enum axp717_irqs { + AXP717_IRQ_VBUS_FAULT, + AXP717_IRQ_VBUS_OVER_V, + AXP717_IRQ_BOOST_OVER_V, + AXP717_IRQ_GAUGE_NEW_SOC = 4, + AXP717_IRQ_SOC_DROP_LVL1 = 6, + AXP717_IRQ_SOC_DROP_LVL2, + AXP717_IRQ_PEK_RIS_EDGE, + AXP717_IRQ_PEK_FAL_EDGE, + AXP717_IRQ_PEK_LONG, + AXP717_IRQ_PEK_SHORT, + AXP717_IRQ_BATT_REMOVAL, + AXP717_IRQ_BATT_PLUGIN, + AXP717_IRQ_VBUS_REMOVAL, + AXP717_IRQ_VBUS_PLUGIN, + AXP717_IRQ_BATT_OVER_V, + AXP717_IRQ_CHARG_TIMER, + AXP717_IRQ_DIE_TEMP_HIGH, + AXP717_IRQ_CHARG, + AXP717_IRQ_CHARG_DONE, + AXP717_IRQ_BATT_OVER_CURR, + AXP717_IRQ_LDO_OVER_CURR, + AXP717_IRQ_WDOG_EXPIRE, + AXP717_IRQ_BATT_ACT_TEMP_LOW, + AXP717_IRQ_BATT_ACT_TEMP_HIGH, + AXP717_IRQ_BATT_CHG_TEMP_LOW, + AXP717_IRQ_BATT_CHG_TEMP_HIGH, + AXP717_IRQ_BATT_QUIT_TEMP_HIGH, + AXP717_IRQ_BC_USB_CHNG = 30, + AXP717_IRQ_BC_USB_DONE, + AXP717_IRQ_TYPEC_PLUGIN = 37, + AXP717_IRQ_TYPEC_REMOVE, +}; + enum axp803_irqs { AXP803_IRQ_ACIN_OVER_V = 1, AXP803_IRQ_ACIN_PLUGIN, diff --git a/include/linux/mfd/cs42l43.h b/include/linux/mfd/cs42l43.h index cf8263aab41b..2239d8585e78 100644 --- a/include/linux/mfd/cs42l43.h +++ b/include/linux/mfd/cs42l43.h @@ -6,20 +6,21 @@ * Cirrus Logic International Semiconductor Ltd. */ +#ifndef CS42L43_CORE_EXT_H +#define CS42L43_CORE_EXT_H + #include <linux/completion.h> -#include <linux/device.h> -#include <linux/gpio/consumer.h> #include <linux/mutex.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> -#include <linux/soundwire/sdw.h> #include <linux/workqueue.h> -#ifndef CS42L43_CORE_EXT_H -#define CS42L43_CORE_EXT_H - #define CS42L43_N_SUPPLIES 3 +struct device; +struct gpio_desc; +struct sdw_slave; + enum cs42l43_irq_numbers { CS42L43_PLL_LOST_LOCK, CS42L43_PLL_READY, diff --git a/include/linux/mfd/da9055/pdata.h b/include/linux/mfd/da9055/pdata.h index d3f126990ad0..137a2b067512 100644 --- a/include/linux/mfd/da9055/pdata.h +++ b/include/linux/mfd/da9055/pdata.h @@ -7,7 +7,6 @@ #define DA9055_MAX_REGULATORS 8 struct da9055; -struct gpio_desc; enum gpio_select { NO_GPIO = 0, @@ -24,16 +23,6 @@ struct da9055_pdata { /* Enable RTC in RESET Mode */ bool reset_enable; /* - * GPI muxed pin to control - * regulator state A/B, 0 if not available. - */ - int *gpio_ren; - /* - * GPI muxed pin to control - * regulator set, 0 if not available. - */ - int *gpio_rsel; - /* * Regulator mode control bits value (GPI offset) that * controls the regulator state, 0 if not available. */ @@ -43,7 +32,5 @@ struct da9055_pdata { * controls the regulator set A/B, 0 if not available. */ enum gpio_select *reg_rsel; - /* GPIO descriptors to enable regulator, NULL if not available */ - struct gpio_desc **ena_gpiods; }; #endif /* __DA9055_PDATA_H */ diff --git a/include/linux/mfd/idtRC38xxx_reg.h b/include/linux/mfd/idtRC38xxx_reg.h new file mode 100644 index 000000000000..ec11872f51ad --- /dev/null +++ b/include/linux/mfd/idtRC38xxx_reg.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Register Map - Based on PolarBear_CSRs.RevA.xlsx (2023-04-21) + * + * Copyright (C) 2023 Integrated Device Technology, Inc., a Renesas Company. + */ +#ifndef MFD_IDTRC38XXX_REG +#define MFD_IDTRC38XXX_REG + +/* GLOBAL */ +#define SOFT_RESET_CTRL (0x15) /* Specific to FC3W */ +#define MISC_CTRL (0x14) /* Specific to FC3A */ +#define APLL_REINIT BIT(1) +#define APLL_REINIT_VFC3A BIT(2) + +#define DEVICE_ID (0x2) +#define DEVICE_ID_MASK (0x1000) /* Bit 12 is 1 if FC3W and 0 if FC3A */ +#define DEVICE_ID_SHIFT (12) + +/* FOD */ +#define FOD_0 (0x300) +#define FOD_0_VFC3A (0x400) +#define FOD_1 (0x340) +#define FOD_1_VFC3A (0x440) +#define FOD_2 (0x380) +#define FOD_2_VFC3A (0x480) + +/* TDCAPLL */ +#define TDC_CTRL (0x44a) /* Specific to FC3W */ +#define TDC_ENABLE_CTRL (0x169) /* Specific to FC3A */ +#define TDC_DAC_CAL_CTRL (0x16a) /* Specific to FC3A */ +#define TDC_EN BIT(0) +#define TDC_DAC_RECAL_REQ BIT(1) +#define TDC_DAC_RECAL_REQ_VFC3A BIT(0) + +#define TDC_FB_DIV_INT_CNFG (0x442) +#define TDC_FB_DIV_INT_CNFG_VFC3A (0x162) +#define TDC_FB_DIV_INT_MASK GENMASK(7, 0) +#define TDC_REF_DIV_CNFG (0x443) +#define TDC_REF_DIV_CNFG_VFC3A (0x163) +#define TDC_REF_DIV_CONFIG_MASK GENMASK(2, 0) + +/* TIME SYNC CHANNEL */ +#define TIME_CLOCK_SRC (0xa01) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT (0xa00) /* Specific to FC3W */ +#define TIME_CLOCK_COUNT_MASK GENMASK(5, 0) + +#define SUB_SYNC_GEN_CNFG (0xa04) + +#define TOD_COUNTER_READ_REQ (0xa5f) +#define TOD_COUNTER_READ_REQ_VFC3A (0x6df) +#define TOD_SYNC_LOAD_VAL_CTRL (0xa10) +#define TOD_SYNC_LOAD_VAL_CTRL_VFC3A (0x690) +#define SYNC_COUNTER_MASK GENMASK_ULL(51, 0) +#define SUB_SYNC_COUNTER_MASK GENMASK(30, 0) +#define TOD_SYNC_LOAD_REQ_CTRL (0xa21) +#define TOD_SYNC_LOAD_REQ_CTRL_VFC3A (0x6a1) +#define SYNC_LOAD_ENABLE BIT(1) +#define SUB_SYNC_LOAD_ENABLE BIT(0) +#define SYNC_LOAD_REQ BIT(0) + +#define LPF_MODE_CNFG (0xa80) +#define LPF_MODE_CNFG_VFC3A (0x700) +enum lpf_mode { + LPF_DISABLED = 0, + LPF_WP = 1, + LPF_HOLDOVER = 2, + LPF_WF = 3, + LPF_INVALID = 4 +}; +#define LPF_CTRL (0xa98) +#define LPF_CTRL_VFC3A (0x718) +#define LPF_EN BIT(0) + +#define LPF_BW_CNFG (0xa81) +#define LPF_BW_SHIFT GENMASK(7, 3) +#define LPF_BW_MULT GENMASK(2, 0) +#define LPF_BW_SHIFT_DEFAULT (0xb) +#define LPF_BW_MULT_DEFAULT (0x0) +#define LPF_BW_SHIFT_1PPS (0x5) + +#define LPF_WR_PHASE_CTRL (0xaa8) +#define LPF_WR_PHASE_CTRL_VFC3A (0x728) +#define LPF_WR_FREQ_CTRL (0xab0) +#define LPF_WR_FREQ_CTRL_VFC3A (0x730) + +#define TIME_CLOCK_TDC_FANOUT_CNFG (0xB00) +#define TIME_SYNC_TO_TDC_EN BIT(0) +#define SIG1_MUX_SEL_MASK GENMASK(7, 4) +#define SIG2_MUX_SEL_MASK GENMASK(11, 8) +enum tdc_mux_sel { + REF0 = 0, + REF1 = 1, + REF2 = 2, + REF3 = 3, + REF_CLK5 = 4, + REF_CLK6 = 5, + DPLL_FB_TO_TDC = 6, + DPLL_FB_DIVIDED_TO_TDC = 7, + TIME_CLK_DIVIDED = 8, + TIME_SYNC = 9, +}; + +#define TIME_CLOCK_MEAS_CNFG (0xB04) +#define TDC_MEAS_MODE BIT(0) +enum tdc_meas_mode { + CONTINUOUS = 0, + ONE_SHOT = 1, + MEAS_MODE_INVALID = 2, +}; + +#define TIME_CLOCK_MEAS_DIV_CNFG (0xB08) +#define TIME_REF_DIV_MASK GENMASK(29, 24) + +#define TIME_CLOCK_MEAS_CTRL (0xB10) +#define TDC_MEAS_EN BIT(0) +#define TDC_MEAS_START BIT(1) + +#define TDC_FIFO_READ_REQ (0xB2F) +#define TDC_FIFO_READ (0xB30) +#define COARSE_MEAS_MASK GENMASK_ULL(39, 13) +#define FINE_MEAS_MASK GENMASK(12, 0) + +#define TDC_FIFO_CTRL (0xB12) +#define FIFO_CLEAR BIT(0) +#define TDC_FIFO_STS (0xB38) +#define FIFO_FULL BIT(1) +#define FIFO_EMPTY BIT(0) +#define TDC_FIFO_EVENT (0xB39) +#define FIFO_OVERRUN BIT(1) + +/* DPLL */ +#define MAX_REFERENCE_INDEX (3) +#define MAX_NUM_REF_PRIORITY (4) + +#define MAX_DPLL_INDEX (2) + +#define DPLL_STS (0x580) +#define DPLL_STS_VFC3A (0x571) +#define DPLL_STATE_STS_MASK (0x70) +#define DPLL_STATE_STS_SHIFT (4) +#define DPLL_REF_SEL_STS_MASK (0x6) +#define DPLL_REF_SEL_STS_SHIFT (1) + +#define DPLL_REF_PRIORITY_CNFG (0x502) +#define DPLL_REFX_PRIORITY_DISABLE_MASK (0xf) +#define DPLL_REF0_PRIORITY_ENABLE_AND_SET_MASK (0x31) +#define DPLL_REF1_PRIORITY_ENABLE_AND_SET_MASK (0xc2) +#define DPLL_REF2_PRIORITY_ENABLE_AND_SET_MASK (0x304) +#define DPLL_REF3_PRIORITY_ENABLE_AND_SET_MASK (0xc08) +#define DPLL_REF0_PRIORITY_SHIFT (4) +#define DPLL_REF1_PRIORITY_SHIFT (6) +#define DPLL_REF2_PRIORITY_SHIFT (8) +#define DPLL_REF3_PRIORITY_SHIFT (10) + +enum dpll_state { + DPLL_STATE_MIN = 0, + DPLL_STATE_FREERUN = DPLL_STATE_MIN, + DPLL_STATE_LOCKED = 1, + DPLL_STATE_HOLDOVER = 2, + DPLL_STATE_WRITE_FREQUENCY = 3, + DPLL_STATE_ACQUIRE = 4, + DPLL_STATE_HITLESS_SWITCH = 5, + DPLL_STATE_MAX = DPLL_STATE_HITLESS_SWITCH +}; + +/* REFMON */ +#define LOSMON_STS_0 (0x81e) +#define LOSMON_STS_0_VFC3A (0x18e) +#define LOSMON_STS_1 (0x82e) +#define LOSMON_STS_1_VFC3A (0x19e) +#define LOSMON_STS_2 (0x83e) +#define LOSMON_STS_2_VFC3A (0x1ae) +#define LOSMON_STS_3 (0x84e) +#define LOSMON_STS_3_VFC3A (0x1be) +#define LOS_STS_MASK (0x1) + +#define FREQMON_STS_0 (0x874) +#define FREQMON_STS_0_VFC3A (0x1d4) +#define FREQMON_STS_1 (0x894) +#define FREQMON_STS_1_VFC3A (0x1f4) +#define FREQMON_STS_2 (0x8b4) +#define FREQMON_STS_2_VFC3A (0x214) +#define FREQMON_STS_3 (0x8d4) +#define FREQMON_STS_3_VFC3A (0x234) +#define FREQ_FAIL_STS_SHIFT (31) + +/* Firmware interface */ +#define TIME_CLK_FREQ_ADDR (0xffa0) +#define XTAL_FREQ_ADDR (0xffa1) + +/* + * Return register address and field mask based on passed in firmware version + */ +#define IDTFC3_FW_REG(FW, VER, REG) (((FW) < (VER)) ? (REG) : (REG##_##VER)) +#define IDTFC3_FW_FIELD(FW, VER, FIELD) (((FW) < (VER)) ? (FIELD) : (FIELD##_##VER)) +enum fw_version { + V_DEFAULT = 0, + VFC3W = 1, + VFC3A = 2 +}; + +/* XTAL_FREQ_ADDR/TIME_CLK_FREQ_ADDR */ +enum { + FREQ_MIN = 0, + FREQ_25M = 1, + FREQ_49_152M = 2, + FREQ_50M = 3, + FREQ_100M = 4, + FREQ_125M = 5, + FREQ_250M = 6, + FREQ_MAX +}; + +struct idtfc3_hw_param { + u32 xtal_freq; + u32 time_clk_freq; +}; + +struct idtfc3_fwrc { + u8 hiaddr; + u8 loaddr; + u8 value; + u8 reserved; +} __packed; + +static inline void idtfc3_default_hw_param(struct idtfc3_hw_param *hw_param) +{ + hw_param->xtal_freq = 49152000; + hw_param->time_clk_freq = 25000000; +} + +static inline int idtfc3_set_hw_param(struct idtfc3_hw_param *hw_param, + u16 addr, u8 val) +{ + if (addr == XTAL_FREQ_ADDR) + switch (val) { + case FREQ_49_152M: + hw_param->xtal_freq = 49152000; + break; + case FREQ_50M: + hw_param->xtal_freq = 50000000; + break; + default: + return -EINVAL; + } + else if (addr == TIME_CLK_FREQ_ADDR) + switch (val) { + case FREQ_25M: + hw_param->time_clk_freq = 25000000; + break; + case FREQ_50M: + hw_param->time_clk_freq = 50000000; + break; + case FREQ_100M: + hw_param->time_clk_freq = 100000000; + break; + case FREQ_125M: + hw_param->time_clk_freq = 125000000; + break; + case FREQ_250M: + hw_param->time_clk_freq = 250000000; + break; + default: + return -EINVAL; + } + else + return -EFAULT; + + return 0; +} + +#endif diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index ee66c9751003..988f1cd90032 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -205,6 +205,7 @@ struct m10bmc_csr_map { unsigned int pr_reh_addr; unsigned int pr_magic; unsigned int rsu_update_counter; + unsigned int staging_size; }; /** diff --git a/include/linux/mfd/lp8788.h b/include/linux/mfd/lp8788.h index 3d5c480d58ea..fd17bec2a33e 100644 --- a/include/linux/mfd/lp8788.h +++ b/include/linux/mfd/lp8788.h @@ -10,9 +10,7 @@ #ifndef __MFD_LP8788_H__ #define __MFD_LP8788_H__ -#include <linux/gpio.h> #include <linux/irqdomain.h> -#include <linux/pwm.h> #include <linux/regmap.h> #define LP8788_DEV_BUCK "lp8788-buck" @@ -88,12 +86,6 @@ enum lp8788_charger_event { CHARGER_DETECTED, }; -enum lp8788_bl_ctrl_mode { - LP8788_BL_REGISTER_ONLY, - LP8788_BL_COMB_PWM_BASED, /* PWM + I2C, changed by PWM input */ - LP8788_BL_COMB_REGISTER_BASED, /* PWM + I2C, changed by I2C */ -}; - enum lp8788_bl_dim_mode { LP8788_DIM_EXPONENTIAL, LP8788_DIM_LINEAR, @@ -159,21 +151,17 @@ struct lp8788; /* * lp8788_buck1_dvs - * @gpio : gpio pin number for dvs control * @vsel : dvs selector for buck v1 register */ struct lp8788_buck1_dvs { - int gpio; enum lp8788_dvs_sel vsel; }; /* * lp8788_buck2_dvs - * @gpio : two gpio pin numbers are used for dvs * @vsel : dvs selector for buck v2 register */ struct lp8788_buck2_dvs { - int gpio[LP8788_NUM_BUCK2_DVS]; enum lp8788_dvs_sel vsel; }; @@ -207,31 +195,6 @@ struct lp8788_charger_platform_data { }; /* - * struct lp8788_backlight_platform_data - * @name : backlight driver name. (default: "lcd-backlight") - * @initial_brightness : initial value of backlight brightness - * @bl_mode : brightness control by pwm or lp8788 register - * @dim_mode : dimming mode selection - * @full_scale : full scale current setting - * @rise_time : brightness ramp up step time - * @fall_time : brightness ramp down step time - * @pwm_pol : pwm polarity setting when bl_mode is pwm based - * @period_ns : platform specific pwm period value. unit is nano. - Only valid when bl_mode is LP8788_BL_COMB_PWM_BASED - */ -struct lp8788_backlight_platform_data { - char *name; - int initial_brightness; - enum lp8788_bl_ctrl_mode bl_mode; - enum lp8788_bl_dim_mode dim_mode; - enum lp8788_bl_full_scale_current full_scale; - enum lp8788_bl_ramp_step rise_time; - enum lp8788_bl_ramp_step fall_time; - enum pwm_polarity pwm_pol; - unsigned int period_ns; -}; - -/* * struct lp8788_led_platform_data * @name : led driver name. (default: "keyboard-backlight") * @scale : current scale @@ -268,11 +231,10 @@ struct lp8788_vib_platform_data { * @buck_data : regulator initial data for buck * @dldo_data : regulator initial data for digital ldo * @aldo_data : regulator initial data for analog ldo - * @buck1_dvs : gpio configurations for buck1 dvs - * @buck2_dvs : gpio configurations for buck2 dvs + * @buck1_dvs : configurations for buck1 dvs + * @buck2_dvs : configurations for buck2 dvs * @chg_pdata : platform data for charger driver * @alarm_sel : rtc alarm selection (1 or 2) - * @bl_pdata : configurable data for backlight driver * @led_pdata : configurable data for led driver * @vib_pdata : configurable data for vibrator driver * @adc_pdata : iio map data for adc driver @@ -294,9 +256,6 @@ struct lp8788_platform_data { /* rtc alarm */ enum lp8788_alarm_sel alarm_sel; - /* backlight */ - struct lp8788_backlight_platform_data *bl_pdata; - /* current sinks */ struct lp8788_led_platform_data *led_pdata; struct lp8788_vib_platform_data *vib_pdata; diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index 6193905abbb5..5c2cc1103437 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -178,7 +178,6 @@ struct max8997_platform_data { * */ bool ignore_gpiodvs_side_effect; - int buck125_gpios[3]; /* GPIO of [0]SET1, [1]SET2, [2]SET3 */ int buck125_default_idx; /* Default value of SET1, 2, 3 */ unsigned int buck1_voltage[8]; /* buckx_voltage in uV */ bool buck1_gpiodvs; diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index 79c020bd0c70..a054e55c8646 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -65,10 +65,7 @@ struct max8998_regulator_data { * be other than the preset values. * @buck1_voltage: BUCK1 DVS mode 1 voltage registers * @buck2_voltage: BUCK2 DVS mode 2 voltage registers - * @buck1_set1: BUCK1 gpio pin 1 to set output voltage - * @buck1_set2: BUCK1 gpio pin 2 to set output voltage * @buck1_default_idx: Default for BUCK1 gpio pin 1, 2 - * @buck2_set3: BUCK2 gpio pin to set output voltage * @buck2_default_idx: Default for BUCK2 gpio pin. * @wakeup: Allow to wake up from suspend * @rtc_delay: LP3974 RTC chip bug that requires delay after a register @@ -91,10 +88,7 @@ struct max8998_platform_data { bool buck_voltage_lock; int buck1_voltage[4]; int buck2_voltage[2]; - int buck1_set1; - int buck1_set2; int buck1_default_idx; - int buck2_set3; int buck2_default_idx; bool wakeup; bool rtc_delay; diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index 78e167a92483..69cbea78b430 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -113,6 +113,148 @@ enum rk808_reg { #define RK808_INT_STS_MSK_REG2 0x4f #define RK808_IO_POL_REG 0x50 +/* RK816 */ +enum rk816_reg { + RK816_ID_DCDC1, + RK816_ID_DCDC2, + RK816_ID_DCDC3, + RK816_ID_DCDC4, + RK816_ID_LDO1, + RK816_ID_LDO2, + RK816_ID_LDO3, + RK816_ID_LDO4, + RK816_ID_LDO5, + RK816_ID_LDO6, + RK816_ID_BOOST, + RK816_ID_OTG_SW, +}; + +enum rk816_irqs { + /* INT_STS_REG1 */ + RK816_IRQ_PWRON_FALL, + RK816_IRQ_PWRON_RISE, + + /* INT_STS_REG2 */ + RK816_IRQ_VB_LOW, + RK816_IRQ_PWRON, + RK816_IRQ_PWRON_LP, + RK816_IRQ_HOTDIE, + RK816_IRQ_RTC_ALARM, + RK816_IRQ_RTC_PERIOD, + RK816_IRQ_USB_OV, + + /* INT_STS_REG3 */ + RK816_IRQ_PLUG_IN, + RK816_IRQ_PLUG_OUT, + RK816_IRQ_CHG_OK, + RK816_IRQ_CHG_TE, + RK816_IRQ_CHG_TS, + RK816_IRQ_CHG_CVTLIM, + RK816_IRQ_DISCHG_ILIM, +}; + +/* power channel registers */ +#define RK816_DCDC_EN_REG1 0x23 + +#define RK816_DCDC_EN_REG2 0x24 +#define RK816_BOOST_EN BIT(1) +#define RK816_OTG_EN BIT(2) +#define RK816_BOOST_EN_MSK BIT(5) +#define RK816_OTG_EN_MSK BIT(6) +#define RK816_BUCK_DVS_CONFIRM BIT(7) + +#define RK816_LDO_EN_REG1 0x27 + +#define RK816_LDO_EN_REG2 0x28 + +/* interrupt registers and irq definitions */ +#define RK816_INT_STS_REG1 0x49 +#define RK816_INT_STS_MSK_REG1 0x4a +#define RK816_INT_STS_PWRON_FALL BIT(5) +#define RK816_INT_STS_PWRON_RISE BIT(6) + +#define RK816_INT_STS_REG2 0x4c +#define RK816_INT_STS_MSK_REG2 0x4d +#define RK816_INT_STS_VB_LOW BIT(1) +#define RK816_INT_STS_PWRON BIT(2) +#define RK816_INT_STS_PWRON_LP BIT(3) +#define RK816_INT_STS_HOTDIE BIT(4) +#define RK816_INT_STS_RTC_ALARM BIT(5) +#define RK816_INT_STS_RTC_PERIOD BIT(6) +#define RK816_INT_STS_USB_OV BIT(7) + +#define RK816_INT_STS_REG3 0x4e +#define RK816_INT_STS_MSK_REG3 0x4f +#define RK816_INT_STS_PLUG_IN BIT(0) +#define RK816_INT_STS_PLUG_OUT BIT(1) +#define RK816_INT_STS_CHG_OK BIT(2) +#define RK816_INT_STS_CHG_TE BIT(3) +#define RK816_INT_STS_CHG_TS BIT(4) +#define RK816_INT_STS_CHG_CVTLIM BIT(6) +#define RK816_INT_STS_DISCHG_ILIM BIT(7) + +#define RK816_IRQ_STS_OFFSET(x) ((x) - RK816_INT_STS_REG1) +#define RK816_IRQ_MSK_OFFSET(x) ((x) - RK816_INT_STS_MSK_REG1) + +/* charger, boost and OTG registers */ +#define RK816_OTG_BUCK_LDO_CONFIG_REG 0x2a +#define RK816_CHRG_CONFIG_REG 0x2b +#define RK816_BOOST_ON_VESL_REG 0x54 +#define RK816_BOOST_SLP_VSEL_REG 0x55 +#define RK816_CHRG_BOOST_CONFIG_REG 0x9a +#define RK816_SUP_STS_REG 0xa0 +#define RK816_USB_CTRL_REG 0xa1 +#define RK816_CHRG_CTRL(x) (0xa3 + (x)) +#define RK816_BAT_CTRL_REG 0xa6 +#define RK816_BAT_HTS_TS_REG 0xa8 +#define RK816_BAT_LTS_TS_REG 0xa9 + +/* adc and fuel gauge registers */ +#define RK816_TS_CTRL_REG 0xac +#define RK816_ADC_CTRL_REG 0xad +#define RK816_GGCON_REG 0xb0 +#define RK816_GGSTS_REG 0xb1 +#define RK816_ZERO_CUR_ADC_REGH 0xb2 +#define RK816_ZERO_CUR_ADC_REGL 0xb3 +#define RK816_GASCNT_CAL_REG(x) (0xb7 - (x)) +#define RK816_GASCNT_REG(x) (0xbb - (x)) +#define RK816_BAT_CUR_AVG_REGH 0xbc +#define RK816_BAT_CUR_AVG_REGL 0xbd +#define RK816_TS_ADC_REGH 0xbe +#define RK816_TS_ADC_REGL 0xbf +#define RK816_USB_ADC_REGH 0xc0 +#define RK816_USB_ADC_REGL 0xc1 +#define RK816_BAT_OCV_REGH 0xc2 +#define RK816_BAT_OCV_REGL 0xc3 +#define RK816_BAT_VOL_REGH 0xc4 +#define RK816_BAT_VOL_REGL 0xc5 +#define RK816_RELAX_ENTRY_THRES_REGH 0xc6 +#define RK816_RELAX_ENTRY_THRES_REGL 0xc7 +#define RK816_RELAX_EXIT_THRES_REGH 0xc8 +#define RK816_RELAX_EXIT_THRES_REGL 0xc9 +#define RK816_RELAX_VOL1_REGH 0xca +#define RK816_RELAX_VOL1_REGL 0xcb +#define RK816_RELAX_VOL2_REGH 0xcc +#define RK816_RELAX_VOL2_REGL 0xcd +#define RK816_RELAX_CUR1_REGH 0xce +#define RK816_RELAX_CUR1_REGL 0xcf +#define RK816_RELAX_CUR2_REGH 0xd0 +#define RK816_RELAX_CUR2_REGL 0xd1 +#define RK816_CAL_OFFSET_REGH 0xd2 +#define RK816_CAL_OFFSET_REGL 0xd3 +#define RK816_NON_ACT_TIMER_CNT_REG 0xd4 +#define RK816_VCALIB0_REGH 0xd5 +#define RK816_VCALIB0_REGL 0xd6 +#define RK816_VCALIB1_REGH 0xd7 +#define RK816_VCALIB1_REGL 0xd8 +#define RK816_FCC_GASCNT_REG(x) (0xdc - (x)) +#define RK816_IOFFSET_REGH 0xdd +#define RK816_IOFFSET_REGL 0xde +#define RK816_SLEEP_CON_SAMP_CUR_REG 0xdf + +/* general purpose data registers 0xe0 ~ 0xf2 */ +#define RK816_DATA_REG(x) (0xe0 + (x)) + /* RK818 */ #define RK818_DCDC1 0 #define RK818_LDO1 4 @@ -791,6 +933,7 @@ enum rk806_dvs_mode { #define VOUT_LO_INT BIT(0) #define CLK32KOUT2_EN BIT(0) +#define TEMP105C 0x08 #define TEMP115C 0x0c #define TEMP_HOTDIE_MSK 0x0c #define SLP_SD_MSK (0x3 << 2) @@ -1191,6 +1334,7 @@ enum { RK806_ID = 0x8060, RK808_ID = 0x0000, RK809_ID = 0x8090, + RK816_ID = 0x8160, RK817_ID = 0x8170, RK818_ID = 0x8180, }; diff --git a/include/linux/mfd/rohm-bd71828.h b/include/linux/mfd/rohm-bd71828.h index 3b5f3a7db4bd..ce786c96404a 100644 --- a/include/linux/mfd/rohm-bd71828.h +++ b/include/linux/mfd/rohm-bd71828.h @@ -4,6 +4,7 @@ #ifndef __LINUX_MFD_BD71828_H__ #define __LINUX_MFD_BD71828_H__ +#include <linux/bits.h> #include <linux/mfd/rohm-generic.h> #include <linux/mfd/rohm-shared.h> @@ -41,7 +42,8 @@ enum { #define BD71828_REG_PS_CTRL_2 0x05 #define BD71828_REG_PS_CTRL_3 0x06 -//#define BD71828_REG_SWRESET 0x06 +#define BD71828_MASK_STATE_HBNT BIT(1) + #define BD71828_MASK_RUN_LVL_CTRL 0x30 /* Regulator control masks */ @@ -133,7 +135,6 @@ enum { #define BD71828_REG_LDO5_VOLT 0x43 #define BD71828_REG_LDO5_VOLT_OPT 0x42 #define BD71828_REG_LDO6_EN 0x44 -//#define BD71828_REG_LDO6_VOLT 0x4 #define BD71828_REG_LDO7_EN 0x45 #define BD71828_REG_LDO7_VOLT 0x46 diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index ca35af30745f..9eb17481b07f 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -41,6 +41,11 @@ #define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ #define TIM_SMCR_TS (BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */ #define TIM_DIER_UIE BIT(0) /* Update interrupt */ +#define TIM_DIER_CC1IE BIT(1) /* CC1 Interrupt Enable */ +#define TIM_DIER_CC2IE BIT(2) /* CC2 Interrupt Enable */ +#define TIM_DIER_CC3IE BIT(3) /* CC3 Interrupt Enable */ +#define TIM_DIER_CC4IE BIT(4) /* CC4 Interrupt Enable */ +#define TIM_DIER_CC_IE(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt enable */ #define TIM_DIER_UDE BIT(8) /* Update DMA request Enable */ #define TIM_DIER_CC1DE BIT(9) /* CC1 DMA request Enable */ #define TIM_DIER_CC2DE BIT(10) /* CC2 DMA request Enable */ @@ -49,6 +54,7 @@ #define TIM_DIER_COMDE BIT(13) /* COM DMA request Enable */ #define TIM_DIER_TDE BIT(14) /* Trigger DMA request Enable */ #define TIM_SR_UIF BIT(0) /* Update interrupt flag */ +#define TIM_SR_CC_IF(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt flag */ #define TIM_EGR_UG BIT(0) /* Update Generation */ #define TIM_CCMR_PE BIT(3) /* Channel Preload Enable */ #define TIM_CCMR_M1 (BIT(6) | BIT(5)) /* Channel PWM Mode 1 */ @@ -60,16 +66,23 @@ #define TIM_CCMR_CC1S_TI2 BIT(1) /* IC1/IC3 selects TI2/TI4 */ #define TIM_CCMR_CC2S_TI2 BIT(8) /* IC2/IC4 selects TI2/TI4 */ #define TIM_CCMR_CC2S_TI1 BIT(9) /* IC2/IC4 selects TI1/TI3 */ +#define TIM_CCMR_CC3S (BIT(0) | BIT(1)) /* Capture/compare 3 sel */ +#define TIM_CCMR_CC4S (BIT(8) | BIT(9)) /* Capture/compare 4 sel */ +#define TIM_CCMR_CC3S_TI3 BIT(0) /* IC3 selects TI3 */ +#define TIM_CCMR_CC4S_TI4 BIT(8) /* IC4 selects TI4 */ #define TIM_CCER_CC1E BIT(0) /* Capt/Comp 1 out Ena */ #define TIM_CCER_CC1P BIT(1) /* Capt/Comp 1 Polarity */ #define TIM_CCER_CC1NE BIT(2) /* Capt/Comp 1N out Ena */ #define TIM_CCER_CC1NP BIT(3) /* Capt/Comp 1N Polarity */ #define TIM_CCER_CC2E BIT(4) /* Capt/Comp 2 out Ena */ #define TIM_CCER_CC2P BIT(5) /* Capt/Comp 2 Polarity */ +#define TIM_CCER_CC2NP BIT(7) /* Capt/Comp 2N Polarity */ #define TIM_CCER_CC3E BIT(8) /* Capt/Comp 3 out Ena */ #define TIM_CCER_CC3P BIT(9) /* Capt/Comp 3 Polarity */ +#define TIM_CCER_CC3NP BIT(11) /* Capt/Comp 3N Polarity */ #define TIM_CCER_CC4E BIT(12) /* Capt/Comp 4 out Ena */ #define TIM_CCER_CC4P BIT(13) /* Capt/Comp 4 Polarity */ +#define TIM_CCER_CC4NP BIT(15) /* Capt/Comp 4N Polarity */ #define TIM_CCER_CCXE (BIT(0) | BIT(4) | BIT(8) | BIT(12)) #define TIM_BDTR_BKE(x) BIT(12 + (x) * 12) /* Break input enable */ #define TIM_BDTR_BKP(x) BIT(13 + (x) * 12) /* Break input polarity */ diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h index ea0ccf33a459..021f820f9d52 100644 --- a/include/linux/mfd/sun4i-gpadc.h +++ b/include/linux/mfd/sun4i-gpadc.h @@ -81,8 +81,8 @@ #define SUN4I_GPADC_TEMP_DATA 0x20 #define SUN4I_GPADC_DATA 0x24 -#define SUN4I_GPADC_IRQ_FIFO_DATA 0 -#define SUN4I_GPADC_IRQ_TEMP_DATA 1 +#define SUN4I_GPADC_IRQ_FIFO_DATA 1 +#define SUN4I_GPADC_IRQ_TEMP_DATA 2 /* 10s delay before suspending the IP */ #define SUN4I_GPADC_AUTOSUSPEND_DELAY 10000 diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index fecc2fa2a364..c315903f6dab 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -17,20 +17,17 @@ struct device_node; #ifdef CONFIG_MFD_SYSCON -extern struct regmap *device_node_to_regmap(struct device_node *np); -extern struct regmap *syscon_node_to_regmap(struct device_node *np); -extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s); -extern struct regmap *syscon_regmap_lookup_by_phandle( - struct device_node *np, - const char *property); -extern struct regmap *syscon_regmap_lookup_by_phandle_args( - struct device_node *np, - const char *property, - int arg_count, - unsigned int *out_args); -extern struct regmap *syscon_regmap_lookup_by_phandle_optional( - struct device_node *np, - const char *property); +struct regmap *device_node_to_regmap(struct device_node *np); +struct regmap *syscon_node_to_regmap(struct device_node *np); +struct regmap *syscon_regmap_lookup_by_compatible(const char *s); +struct regmap *syscon_regmap_lookup_by_phandle(struct device_node *np, + const char *property); +struct regmap *syscon_regmap_lookup_by_phandle_args(struct device_node *np, + const char *property, + int arg_count, + unsigned int *out_args); +struct regmap *syscon_regmap_lookup_by_phandle_optional(struct device_node *np, + const char *property); #else static inline struct regmap *device_node_to_regmap(struct device_node *np) { diff --git a/include/linux/mfd/tps6594.h b/include/linux/mfd/tps6594.h index 3f7c5e23cd4c..16543fd4d83e 100644 --- a/include/linux/mfd/tps6594.h +++ b/include/linux/mfd/tps6594.h @@ -18,12 +18,13 @@ enum pmic_id { TPS6594, TPS6593, LP8764, + TPS65224, }; /* Macro to get page index from register address */ #define TPS6594_REG_TO_PAGE(reg) ((reg) >> 8) -/* Registers for page 0 of TPS6594 */ +/* Registers for page 0 */ #define TPS6594_REG_DEV_REV 0x01 #define TPS6594_REG_NVM_CODE_1 0x02 @@ -56,9 +57,6 @@ enum pmic_id { #define TPS6594_REG_GPIOX_OUT(gpio_inst) (TPS6594_REG_GPIO_OUT_1 + (gpio_inst) / 8) #define TPS6594_REG_GPIOX_IN(gpio_inst) (TPS6594_REG_GPIO_IN_1 + (gpio_inst) / 8) -#define TPS6594_REG_GPIO_IN_1 0x3f -#define TPS6594_REG_GPIO_IN_2 0x40 - #define TPS6594_REG_RAIL_SEL_1 0x41 #define TPS6594_REG_RAIL_SEL_2 0x42 #define TPS6594_REG_RAIL_SEL_3 0x43 @@ -70,13 +68,15 @@ enum pmic_id { #define TPS6594_REG_FSM_TRIG_MASK_3 0x48 #define TPS6594_REG_MASK_BUCK1_2 0x49 +#define TPS65224_REG_MASK_BUCKS 0x49 #define TPS6594_REG_MASK_BUCK3_4 0x4a #define TPS6594_REG_MASK_BUCK5 0x4b #define TPS6594_REG_MASK_LDO1_2 0x4c +#define TPS65224_REG_MASK_LDOS 0x4c #define TPS6594_REG_MASK_LDO3_4 0x4d #define TPS6594_REG_MASK_VMON 0x4e -#define TPS6594_REG_MASK_GPIO1_8_FALL 0x4f -#define TPS6594_REG_MASK_GPIO1_8_RISE 0x50 +#define TPS6594_REG_MASK_GPIO_FALL 0x4f +#define TPS6594_REG_MASK_GPIO_RISE 0x50 #define TPS6594_REG_MASK_GPIO9_11 0x51 #define TPS6594_REG_MASK_STARTUP 0x52 #define TPS6594_REG_MASK_MISC 0x53 @@ -174,6 +174,10 @@ enum pmic_id { #define TPS6594_REG_REGISTER_LOCK 0xa1 +#define TPS65224_REG_SRAM_ACCESS_1 0xa2 +#define TPS65224_REG_SRAM_ACCESS_2 0xa3 +#define TPS65224_REG_SRAM_ADDR_CTRL 0xa4 +#define TPS65224_REG_RECOV_CNT_PFSM_INCR 0xa5 #define TPS6594_REG_MANUFACTURING_VER 0xa6 #define TPS6594_REG_CUSTOMER_NVM_ID_REG 0xa7 @@ -182,6 +186,9 @@ enum pmic_id { #define TPS6594_REG_SOFT_REBOOT_REG 0xab +#define TPS65224_REG_ADC_CTRL 0xac +#define TPS65224_REG_ADC_RESULT_REG_1 0xad +#define TPS65224_REG_ADC_RESULT_REG_2 0xae #define TPS6594_REG_RTC_SECONDS 0xb5 #define TPS6594_REG_RTC_MINUTES 0xb6 #define TPS6594_REG_RTC_HOURS 0xb7 @@ -199,6 +206,7 @@ enum pmic_id { #define TPS6594_REG_RTC_CTRL_1 0xc2 #define TPS6594_REG_RTC_CTRL_2 0xc3 +#define TPS65224_REG_STARTUP_CTRL 0xc3 #define TPS6594_REG_RTC_STATUS 0xc4 #define TPS6594_REG_RTC_INTERRUPTS 0xc5 #define TPS6594_REG_RTC_COMP_LSB 0xc6 @@ -214,13 +222,17 @@ enum pmic_id { #define TPS6594_REG_PFSM_DELAY_REG_2 0xce #define TPS6594_REG_PFSM_DELAY_REG_3 0xcf #define TPS6594_REG_PFSM_DELAY_REG_4 0xd0 +#define TPS65224_REG_ADC_GAIN_COMP_REG 0xd0 +#define TPS65224_REG_CRC_CALC_CONTROL 0xef +#define TPS65224_REG_REGMAP_USER_CRC_LOW 0xf0 +#define TPS65224_REG_REGMAP_USER_CRC_HIGH 0xf1 -/* Registers for page 1 of TPS6594 */ +/* Registers for page 1 */ #define TPS6594_REG_SERIAL_IF_CONFIG 0x11a #define TPS6594_REG_I2C1_ID 0x122 #define TPS6594_REG_I2C2_ID 0x123 -/* Registers for page 4 of TPS6594 */ +/* Registers for page 4 */ #define TPS6594_REG_WD_ANSWER_REG 0x401 #define TPS6594_REG_WD_QUESTION_ANSW_CNT 0x402 #define TPS6594_REG_WD_WIN1_CFG 0x403 @@ -241,16 +253,26 @@ enum pmic_id { #define TPS6594_BIT_BUCK_PLDN BIT(5) #define TPS6594_BIT_BUCK_RV_SEL BIT(7) -/* BUCKX_CONF register field definition */ +/* TPS6594 BUCKX_CONF register field definition */ #define TPS6594_MASK_BUCK_SLEW_RATE GENMASK(2, 0) #define TPS6594_MASK_BUCK_ILIM GENMASK(5, 3) -/* BUCKX_PG_WINDOW register field definition */ +/* TPS65224 BUCKX_CONF register field definition */ +#define TPS65224_MASK_BUCK_SLEW_RATE GENMASK(1, 0) + +/* TPS6594 BUCKX_PG_WINDOW register field definition */ #define TPS6594_MASK_BUCK_OV_THR GENMASK(2, 0) #define TPS6594_MASK_BUCK_UV_THR GENMASK(5, 3) -/* BUCKX VSET */ -#define TPS6594_MASK_BUCKS_VSET GENMASK(7, 0) +/* TPS65224 BUCKX_PG_WINDOW register field definition */ +#define TPS65224_MASK_BUCK_VMON_THR GENMASK(1, 0) + +/* TPS6594 BUCKX_VOUT register field definition */ +#define TPS6594_MASK_BUCKS_VSET GENMASK(7, 0) + +/* TPS65224 BUCKX_VOUT register field definition */ +#define TPS65224_MASK_BUCK1_VSET GENMASK(7, 0) +#define TPS65224_MASK_BUCKS_VSET GENMASK(6, 0) /* LDOX_CTRL register field definition */ #define TPS6594_BIT_LDO_EN BIT(0) @@ -258,6 +280,7 @@ enum pmic_id { #define TPS6594_BIT_LDO_VMON_EN BIT(4) #define TPS6594_MASK_LDO_PLDN GENMASK(6, 5) #define TPS6594_BIT_LDO_RV_SEL BIT(7) +#define TPS65224_BIT_LDO_DISCHARGE_EN BIT(5) /* LDORTC_CTRL register field definition */ #define TPS6594_BIT_LDORTC_DIS BIT(0) @@ -271,6 +294,9 @@ enum pmic_id { #define TPS6594_MASK_LDO_OV_THR GENMASK(2, 0) #define TPS6594_MASK_LDO_UV_THR GENMASK(5, 3) +/* LDOX_PG_WINDOW register field definition */ +#define TPS65224_MASK_LDO_VMON_THR GENMASK(1, 0) + /* VCCA_VMON_CTRL register field definition */ #define TPS6594_BIT_VMON_EN BIT(0) #define TPS6594_BIT_VMON1_EN BIT(1) @@ -278,10 +304,12 @@ enum pmic_id { #define TPS6594_BIT_VMON2_EN BIT(3) #define TPS6594_BIT_VMON2_RV_SEL BIT(4) #define TPS6594_BIT_VMON_DEGLITCH_SEL BIT(5) +#define TPS65224_BIT_VMON_DEGLITCH_SEL GENMASK(7, 5) /* VCCA_PG_WINDOW register field definition */ #define TPS6594_MASK_VCCA_OV_THR GENMASK(2, 0) #define TPS6594_MASK_VCCA_UV_THR GENMASK(5, 3) +#define TPS65224_MASK_VCCA_VMON_THR GENMASK(1, 0) #define TPS6594_BIT_VCCA_PG_SET BIT(6) /* VMONX_PG_WINDOW register field definition */ @@ -289,6 +317,9 @@ enum pmic_id { #define TPS6594_MASK_VMONX_UV_THR GENMASK(5, 3) #define TPS6594_BIT_VMONX_RANGE BIT(6) +/* VMONX_PG_WINDOW register field definition */ +#define TPS65224_MASK_VMONX_THR GENMASK(1, 0) + /* GPIOX_CONF register field definition */ #define TPS6594_BIT_GPIO_DIR BIT(0) #define TPS6594_BIT_GPIO_OD BIT(1) @@ -296,6 +327,8 @@ enum pmic_id { #define TPS6594_BIT_GPIO_PU_PD_EN BIT(3) #define TPS6594_BIT_GPIO_DEGLITCH_EN BIT(4) #define TPS6594_MASK_GPIO_SEL GENMASK(7, 5) +#define TPS65224_MASK_GPIO_SEL GENMASK(6, 5) +#define TPS65224_MASK_GPIO_SEL_GPIO6 GENMASK(7, 5) /* NPWRON_CONF register field definition */ #define TPS6594_BIT_NRSTOUT_OD BIT(0) @@ -305,6 +338,12 @@ enum pmic_id { #define TPS6594_BIT_ENABLE_POL BIT(5) #define TPS6594_MASK_NPWRON_SEL GENMASK(7, 6) +/* POWER_ON_CONFIG register field definition */ +#define TPS65224_BIT_NINT_ENDRV_PU_SEL BIT(0) +#define TPS65224_BIT_NINT_ENDRV_SEL BIT(1) +#define TPS65224_BIT_EN_PB_DEGL BIT(5) +#define TPS65224_MASK_EN_PB_VSENSE_CONFIG GENMASK(7, 6) + /* GPIO_OUT_X register field definition */ #define TPS6594_BIT_GPIOX_OUT(gpio_inst) BIT((gpio_inst) % 8) @@ -312,6 +351,12 @@ enum pmic_id { #define TPS6594_BIT_GPIOX_IN(gpio_inst) BIT((gpio_inst) % 8) #define TPS6594_BIT_NPWRON_IN BIT(3) +/* GPIO_OUT_X register field definition */ +#define TPS65224_BIT_GPIOX_OUT(gpio_inst) BIT((gpio_inst)) + +/* GPIO_IN_X register field definition */ +#define TPS65224_BIT_GPIOX_IN(gpio_inst) BIT((gpio_inst)) + /* RAIL_SEL_1 register field definition */ #define TPS6594_MASK_BUCK1_GRP_SEL GENMASK(1, 0) #define TPS6594_MASK_BUCK2_GRP_SEL GENMASK(3, 2) @@ -343,6 +388,9 @@ enum pmic_id { #define TPS6594_BIT_GPIOX_FSM_MASK(gpio_inst) BIT(((gpio_inst) << 1) % 8) #define TPS6594_BIT_GPIOX_FSM_MASK_POL(gpio_inst) BIT(((gpio_inst) << 1) % 8 + 1) +#define TPS65224_BIT_GPIOX_FSM_MASK(gpio_inst) BIT(((gpio_inst) << 1) % 6) +#define TPS65224_BIT_GPIOX_FSM_MASK_POL(gpio_inst) BIT(((gpio_inst) << 1) % 6 + 1) + /* MASK_BUCKX register field definition */ #define TPS6594_BIT_BUCKX_OV_MASK(buck_inst) BIT(((buck_inst) << 2) % 8) #define TPS6594_BIT_BUCKX_UV_MASK(buck_inst) BIT(((buck_inst) << 2) % 8 + 1) @@ -361,22 +409,46 @@ enum pmic_id { #define TPS6594_BIT_VMON2_OV_MASK BIT(5) #define TPS6594_BIT_VMON2_UV_MASK BIT(6) +/* MASK_BUCK Register field definition */ +#define TPS65224_BIT_BUCK1_UVOV_MASK BIT(0) +#define TPS65224_BIT_BUCK2_UVOV_MASK BIT(1) +#define TPS65224_BIT_BUCK3_UVOV_MASK BIT(2) +#define TPS65224_BIT_BUCK4_UVOV_MASK BIT(4) + +/* MASK_LDO_VMON register field definition */ +#define TPS65224_BIT_LDO1_UVOV_MASK BIT(0) +#define TPS65224_BIT_LDO2_UVOV_MASK BIT(1) +#define TPS65224_BIT_LDO3_UVOV_MASK BIT(2) +#define TPS65224_BIT_VCCA_UVOV_MASK BIT(4) +#define TPS65224_BIT_VMON1_UVOV_MASK BIT(5) +#define TPS65224_BIT_VMON2_UVOV_MASK BIT(6) + /* MASK_GPIOX register field definition */ #define TPS6594_BIT_GPIOX_FALL_MASK(gpio_inst) BIT((gpio_inst) < 8 ? \ (gpio_inst) : (gpio_inst) % 8) #define TPS6594_BIT_GPIOX_RISE_MASK(gpio_inst) BIT((gpio_inst) < 8 ? \ (gpio_inst) : (gpio_inst) % 8 + 3) +/* MASK_GPIOX register field definition */ +#define TPS65224_BIT_GPIOX_FALL_MASK(gpio_inst) BIT((gpio_inst)) +#define TPS65224_BIT_GPIOX_RISE_MASK(gpio_inst) BIT((gpio_inst)) /* MASK_STARTUP register field definition */ #define TPS6594_BIT_NPWRON_START_MASK BIT(0) #define TPS6594_BIT_ENABLE_MASK BIT(1) #define TPS6594_BIT_FSD_MASK BIT(4) #define TPS6594_BIT_SOFT_REBOOT_MASK BIT(5) +#define TPS65224_BIT_VSENSE_MASK BIT(0) +#define TPS65224_BIT_PB_SHORT_MASK BIT(2) /* MASK_MISC register field definition */ #define TPS6594_BIT_BIST_PASS_MASK BIT(0) #define TPS6594_BIT_EXT_CLK_MASK BIT(1) +#define TPS65224_BIT_REG_UNLOCK_MASK BIT(2) #define TPS6594_BIT_TWARN_MASK BIT(3) +#define TPS65224_BIT_PB_LONG_MASK BIT(4) +#define TPS65224_BIT_PB_FALL_MASK BIT(5) +#define TPS65224_BIT_PB_RISE_MASK BIT(6) +#define TPS65224_BIT_ADC_CONV_READY_MASK BIT(7) /* MASK_MODERATE_ERR register field definition */ #define TPS6594_BIT_BIST_FAIL_MASK BIT(1) @@ -391,6 +463,8 @@ enum pmic_id { #define TPS6594_BIT_ORD_SHUTDOWN_MASK BIT(1) #define TPS6594_BIT_MCU_PWR_ERR_MASK BIT(2) #define TPS6594_BIT_SOC_PWR_ERR_MASK BIT(3) +#define TPS65224_BIT_COMM_ERR_MASK BIT(4) +#define TPS65224_BIT_I2C2_ERR_MASK BIT(5) /* MASK_COMM_ERR register field definition */ #define TPS6594_BIT_COMM_FRM_ERR_MASK BIT(0) @@ -426,6 +500,12 @@ enum pmic_id { #define TPS6594_BIT_BUCK3_4_INT BIT(1) #define TPS6594_BIT_BUCK5_INT BIT(2) +/* INT_BUCK register field definition */ +#define TPS65224_BIT_BUCK1_UVOV_INT BIT(0) +#define TPS65224_BIT_BUCK2_UVOV_INT BIT(1) +#define TPS65224_BIT_BUCK3_UVOV_INT BIT(2) +#define TPS65224_BIT_BUCK4_UVOV_INT BIT(3) + /* INT_BUCKX register field definition */ #define TPS6594_BIT_BUCKX_OV_INT(buck_inst) BIT(((buck_inst) << 2) % 8) #define TPS6594_BIT_BUCKX_UV_INT(buck_inst) BIT(((buck_inst) << 2) % 8 + 1) @@ -437,6 +517,14 @@ enum pmic_id { #define TPS6594_BIT_LDO3_4_INT BIT(1) #define TPS6594_BIT_VCCA_INT BIT(4) +/* INT_LDO_VMON register field definition */ +#define TPS65224_BIT_LDO1_UVOV_INT BIT(0) +#define TPS65224_BIT_LDO2_UVOV_INT BIT(1) +#define TPS65224_BIT_LDO3_UVOV_INT BIT(2) +#define TPS65224_BIT_VCCA_UVOV_INT BIT(4) +#define TPS65224_BIT_VMON1_UVOV_INT BIT(5) +#define TPS65224_BIT_VMON2_UVOV_INT BIT(6) + /* INT_LDOX register field definition */ #define TPS6594_BIT_LDOX_OV_INT(ldo_inst) BIT(((ldo_inst) << 2) % 8) #define TPS6594_BIT_LDOX_UV_INT(ldo_inst) BIT(((ldo_inst) << 2) % 8 + 1) @@ -462,17 +550,32 @@ enum pmic_id { /* INT_GPIOX register field definition */ #define TPS6594_BIT_GPIOX_INT(gpio_inst) BIT(gpio_inst) +/* INT_GPIO register field definition */ +#define TPS65224_BIT_GPIO1_INT BIT(0) +#define TPS65224_BIT_GPIO2_INT BIT(1) +#define TPS65224_BIT_GPIO3_INT BIT(2) +#define TPS65224_BIT_GPIO4_INT BIT(3) +#define TPS65224_BIT_GPIO5_INT BIT(4) +#define TPS65224_BIT_GPIO6_INT BIT(5) + /* INT_STARTUP register field definition */ #define TPS6594_BIT_NPWRON_START_INT BIT(0) +#define TPS65224_BIT_VSENSE_INT BIT(0) #define TPS6594_BIT_ENABLE_INT BIT(1) #define TPS6594_BIT_RTC_INT BIT(2) +#define TPS65224_BIT_PB_SHORT_INT BIT(2) #define TPS6594_BIT_FSD_INT BIT(4) #define TPS6594_BIT_SOFT_REBOOT_INT BIT(5) /* INT_MISC register field definition */ #define TPS6594_BIT_BIST_PASS_INT BIT(0) #define TPS6594_BIT_EXT_CLK_INT BIT(1) +#define TPS65224_BIT_REG_UNLOCK_INT BIT(2) #define TPS6594_BIT_TWARN_INT BIT(3) +#define TPS65224_BIT_PB_LONG_INT BIT(4) +#define TPS65224_BIT_PB_FALL_INT BIT(5) +#define TPS65224_BIT_PB_RISE_INT BIT(6) +#define TPS65224_BIT_ADC_CONV_READY_INT BIT(7) /* INT_MODERATE_ERR register field definition */ #define TPS6594_BIT_TSD_ORD_INT BIT(0) @@ -488,6 +591,7 @@ enum pmic_id { #define TPS6594_BIT_TSD_IMM_INT BIT(0) #define TPS6594_BIT_VCCA_OVP_INT BIT(1) #define TPS6594_BIT_PFSM_ERR_INT BIT(2) +#define TPS65224_BIT_BG_XMON_INT BIT(3) /* INT_FSM_ERR register field definition */ #define TPS6594_BIT_IMM_SHUTDOWN_INT BIT(0) @@ -496,6 +600,7 @@ enum pmic_id { #define TPS6594_BIT_SOC_PWR_ERR_INT BIT(3) #define TPS6594_BIT_COMM_ERR_INT BIT(4) #define TPS6594_BIT_READBACK_ERR_INT BIT(5) +#define TPS65224_BIT_I2C2_ERR_INT BIT(5) #define TPS6594_BIT_ESM_INT BIT(6) #define TPS6594_BIT_WD_INT BIT(7) @@ -536,8 +641,18 @@ enum pmic_id { #define TPS6594_BIT_VMON2_OV_STAT BIT(5) #define TPS6594_BIT_VMON2_UV_STAT BIT(6) +/* STAT_LDO_VMON register field definition */ +#define TPS65224_BIT_LDO1_UVOV_STAT BIT(0) +#define TPS65224_BIT_LDO2_UVOV_STAT BIT(1) +#define TPS65224_BIT_LDO3_UVOV_STAT BIT(2) +#define TPS65224_BIT_VCCA_UVOV_STAT BIT(4) +#define TPS65224_BIT_VMON1_UVOV_STAT BIT(5) +#define TPS65224_BIT_VMON2_UVOV_STAT BIT(6) + /* STAT_STARTUP register field definition */ +#define TPS65224_BIT_VSENSE_STAT BIT(0) #define TPS6594_BIT_ENABLE_STAT BIT(1) +#define TPS65224_BIT_PB_LEVEL_STAT BIT(2) /* STAT_MISC register field definition */ #define TPS6594_BIT_EXT_CLK_STAT BIT(1) @@ -549,6 +664,7 @@ enum pmic_id { /* STAT_SEVERE_ERR register field definition */ #define TPS6594_BIT_TSD_IMM_STAT BIT(0) #define TPS6594_BIT_VCCA_OVP_STAT BIT(1) +#define TPS65224_BIT_BG_XMON_STAT BIT(3) /* STAT_READBACK_ERR register field definition */ #define TPS6594_BIT_EN_DRV_READBACK_STAT BIT(0) @@ -597,6 +713,8 @@ enum pmic_id { #define TPS6594_BIT_BB_CHARGER_EN BIT(0) #define TPS6594_BIT_BB_ICHR BIT(1) #define TPS6594_MASK_BB_VEOC GENMASK(3, 2) +#define TPS65224_BIT_I2C1_SPI_CRC_EN BIT(4) +#define TPS65224_BIT_I2C2_CRC_EN BIT(5) #define TPS6594_BB_EOC_RDY BIT(7) /* ENABLE_DRV_REG register field definition */ @@ -617,6 +735,7 @@ enum pmic_id { #define TPS6594_BIT_NRSTOUT_SOC_IN BIT(2) #define TPS6594_BIT_FORCE_EN_DRV_LOW BIT(3) #define TPS6594_BIT_SPMI_LPM_EN BIT(4) +#define TPS65224_BIT_TSD_DISABLE BIT(5) /* RECOV_CNT_REG_1 register field definition */ #define TPS6594_MASK_RECOV_CNT GENMASK(3, 0) @@ -671,15 +790,27 @@ enum pmic_id { /* ESM_SOC_START_REG register field definition */ #define TPS6594_BIT_ESM_SOC_START BIT(0) +/* ESM_MCU_START_REG register field definition */ +#define TPS65224_BIT_ESM_MCU_START BIT(0) + /* ESM_SOC_MODE_CFG register field definition */ #define TPS6594_MASK_ESM_SOC_ERR_CNT_TH GENMASK(3, 0) #define TPS6594_BIT_ESM_SOC_ENDRV BIT(5) #define TPS6594_BIT_ESM_SOC_EN BIT(6) #define TPS6594_BIT_ESM_SOC_MODE BIT(7) +/* ESM_MCU_MODE_CFG register field definition */ +#define TPS65224_MASK_ESM_MCU_ERR_CNT_TH GENMASK(3, 0) +#define TPS65224_BIT_ESM_MCU_ENDRV BIT(5) +#define TPS65224_BIT_ESM_MCU_EN BIT(6) +#define TPS65224_BIT_ESM_MCU_MODE BIT(7) + /* ESM_SOC_ERR_CNT_REG register field definition */ #define TPS6594_MASK_ESM_SOC_ERR_CNT GENMASK(4, 0) +/* ESM_MCU_ERR_CNT_REG register field definition */ +#define TPS6594_MASK_ESM_MCU_ERR_CNT GENMASK(4, 0) + /* REGISTER_LOCK register field definition */ #define TPS6594_BIT_REGISTER_LOCK_STATUS BIT(0) @@ -687,6 +818,29 @@ enum pmic_id { #define TPS6594_MASK_VMON1_SLEW_RATE GENMASK(2, 0) #define TPS6594_MASK_VMON2_SLEW_RATE GENMASK(5, 3) +/* SRAM_ACCESS_1 Register field definition */ +#define TPS65224_MASk_SRAM_UNLOCK_SEQ GENMASK(7, 0) + +/* SRAM_ACCESS_2 Register field definition */ +#define TPS65224_BIT_SRAM_WRITE_MODE BIT(0) +#define TPS65224_BIT_OTP_PROG_USER BIT(1) +#define TPS65224_BIT_OTP_PROG_PFSM BIT(2) +#define TPS65224_BIT_OTP_PROG_STATUS BIT(3) +#define TPS65224_BIT_SRAM_UNLOCKED BIT(6) +#define TPS65224_USER_PROG_ALLOWED BIT(7) + +/* SRAM_ADDR_CTRL Register field definition */ +#define TPS65224_MASk_SRAM_SEL GENMASK(1, 0) + +/* RECOV_CNT_PFSM_INCR Register field definition */ +#define TPS65224_BIT_INCREMENT_RECOV_CNT BIT(0) + +/* MANUFACTURING_VER Register field definition */ +#define TPS65224_MASK_SILICON_REV GENMASK(7, 0) + +/* CUSTOMER_NVM_ID_REG Register field definition */ +#define TPS65224_MASK_CUSTOMER_NVM_ID GENMASK(7, 0) + /* SOFT_REBOOT_REG register field definition */ #define TPS6594_BIT_SOFT_REBOOT BIT(0) @@ -755,14 +909,83 @@ enum pmic_id { #define TPS6594_BIT_I2C2_CRC_EN BIT(2) #define TPS6594_MASK_T_CRC GENMASK(7, 3) +/* ADC_CTRL Register field definition */ +#define TPS65224_BIT_ADC_START BIT(0) +#define TPS65224_BIT_ADC_CONT_CONV BIT(1) +#define TPS65224_BIT_ADC_THERMAL_SEL BIT(2) +#define TPS65224_BIT_ADC_RDIV_EN BIT(3) +#define TPS65224_BIT_ADC_STATUS BIT(7) + +/* ADC_RESULT_REG_1 Register field definition */ +#define TPS65224_MASK_ADC_RESULT_11_4 GENMASK(7, 0) + +/* ADC_RESULT_REG_2 Register field definition */ +#define TPS65224_MASK_ADC_RESULT_3_0 GENMASK(7, 4) + +/* STARTUP_CTRL Register field definition */ +#define TPS65224_MASK_STARTUP_DEST GENMASK(6, 5) +#define TPS65224_BIT_FIRST_STARTUP_DONE BIT(7) + +/* SCRATCH_PAD_REG_1 Register field definition */ +#define TPS6594_MASK_SCRATCH_PAD_1 GENMASK(7, 0) + +/* SCRATCH_PAD_REG_2 Register field definition */ +#define TPS6594_MASK_SCRATCH_PAD_2 GENMASK(7, 0) + +/* SCRATCH_PAD_REG_3 Register field definition */ +#define TPS6594_MASK_SCRATCH_PAD_3 GENMASK(7, 0) + +/* SCRATCH_PAD_REG_4 Register field definition */ +#define TPS6594_MASK_SCRATCH_PAD_4 GENMASK(7, 0) + +/* PFSM_DELAY_REG_1 Register field definition */ +#define TPS6594_MASK_PFSM_DELAY1 GENMASK(7, 0) + +/* PFSM_DELAY_REG_2 Register field definition */ +#define TPS6594_MASK_PFSM_DELAY2 GENMASK(7, 0) + +/* PFSM_DELAY_REG_3 Register field definition */ +#define TPS6594_MASK_PFSM_DELAY3 GENMASK(7, 0) + +/* PFSM_DELAY_REG_4 Register field definition */ +#define TPS6594_MASK_PFSM_DELAY4 GENMASK(7, 0) + +/* CRC_CALC_CONTROL Register field definition */ +#define TPS65224_BIT_RUN_CRC_BIST BIT(0) +#define TPS65224_BIT_RUN_CRC_UPDATE BIT(1) + +/* ADC_GAIN_COMP_REG Register field definition */ +#define TPS65224_MASK_ADC_GAIN_COMP GENMASK(7, 0) + +/* REGMAP_USER_CRC_LOW Register field definition */ +#define TPS65224_MASK_REGMAP_USER_CRC16_LOW GENMASK(7, 0) + +/* REGMAP_USER_CRC_HIGH Register field definition */ +#define TPS65224_MASK_REGMAP_USER_CRC16_HIGH GENMASK(7, 0) + +/* WD_ANSWER_REG Register field definition */ +#define TPS6594_MASK_WD_ANSWER GENMASK(7, 0) + /* WD_QUESTION_ANSW_CNT register field definition */ #define TPS6594_MASK_WD_QUESTION GENMASK(3, 0) #define TPS6594_MASK_WD_ANSW_CNT GENMASK(5, 4) +#define TPS65224_BIT_INT_TOP_STATUS BIT(7) + +/* WD WIN1_CFG register field definition */ +#define TPS6594_MASK_WD_WIN1_CFG GENMASK(6, 0) + +/* WD WIN2_CFG register field definition */ +#define TPS6594_MASK_WD_WIN2_CFG GENMASK(6, 0) + +/* WD LongWin register field definition */ +#define TPS6594_MASK_WD_LONGWIN_CFG GENMASK(7, 0) /* WD_MODE_REG register field definition */ #define TPS6594_BIT_WD_RETURN_LONGWIN BIT(0) #define TPS6594_BIT_WD_MODE_SELECT BIT(1) #define TPS6594_BIT_WD_PWRHOLD BIT(2) +#define TPS65224_BIT_WD_ENDRV_SEL BIT(6) +#define TPS65224_BIT_WD_CNT_SEL BIT(7) /* WD_QA_CFG register field definition */ #define TPS6594_MASK_WD_QUESTION_SEED GENMASK(3, 0) @@ -993,6 +1216,106 @@ enum tps6594_irqs { #define TPS6594_IRQ_NAME_ALARM "alarm" #define TPS6594_IRQ_NAME_POWERUP "powerup" +/* IRQs */ +enum tps65224_irqs { + /* INT_BUCK register */ + TPS65224_IRQ_BUCK1_UVOV, + TPS65224_IRQ_BUCK2_UVOV, + TPS65224_IRQ_BUCK3_UVOV, + TPS65224_IRQ_BUCK4_UVOV, + /* INT_LDO_VMON register */ + TPS65224_IRQ_LDO1_UVOV, + TPS65224_IRQ_LDO2_UVOV, + TPS65224_IRQ_LDO3_UVOV, + TPS65224_IRQ_VCCA_UVOV, + TPS65224_IRQ_VMON1_UVOV, + TPS65224_IRQ_VMON2_UVOV, + /* INT_GPIO register */ + TPS65224_IRQ_GPIO1, + TPS65224_IRQ_GPIO2, + TPS65224_IRQ_GPIO3, + TPS65224_IRQ_GPIO4, + TPS65224_IRQ_GPIO5, + TPS65224_IRQ_GPIO6, + /* INT_STARTUP register */ + TPS65224_IRQ_VSENSE, + TPS65224_IRQ_ENABLE, + TPS65224_IRQ_PB_SHORT, + TPS65224_IRQ_FSD, + TPS65224_IRQ_SOFT_REBOOT, + /* INT_MISC register */ + TPS65224_IRQ_BIST_PASS, + TPS65224_IRQ_EXT_CLK, + TPS65224_IRQ_REG_UNLOCK, + TPS65224_IRQ_TWARN, + TPS65224_IRQ_PB_LONG, + TPS65224_IRQ_PB_FALL, + TPS65224_IRQ_PB_RISE, + TPS65224_IRQ_ADC_CONV_READY, + /* INT_MODERATE_ERR register */ + TPS65224_IRQ_TSD_ORD, + TPS65224_IRQ_BIST_FAIL, + TPS65224_IRQ_REG_CRC_ERR, + TPS65224_IRQ_RECOV_CNT, + /* INT_SEVERE_ERR register */ + TPS65224_IRQ_TSD_IMM, + TPS65224_IRQ_VCCA_OVP, + TPS65224_IRQ_PFSM_ERR, + TPS65224_IRQ_BG_XMON, + /* INT_FSM_ERR register */ + TPS65224_IRQ_IMM_SHUTDOWN, + TPS65224_IRQ_ORD_SHUTDOWN, + TPS65224_IRQ_MCU_PWR_ERR, + TPS65224_IRQ_SOC_PWR_ERR, + TPS65224_IRQ_COMM_ERR, + TPS65224_IRQ_I2C2_ERR, +}; + +#define TPS65224_IRQ_NAME_BUCK1_UVOV "buck1_uvov" +#define TPS65224_IRQ_NAME_BUCK2_UVOV "buck2_uvov" +#define TPS65224_IRQ_NAME_BUCK3_UVOV "buck3_uvov" +#define TPS65224_IRQ_NAME_BUCK4_UVOV "buck4_uvov" +#define TPS65224_IRQ_NAME_LDO1_UVOV "ldo1_uvov" +#define TPS65224_IRQ_NAME_LDO2_UVOV "ldo2_uvov" +#define TPS65224_IRQ_NAME_LDO3_UVOV "ldo3_uvov" +#define TPS65224_IRQ_NAME_VCCA_UVOV "vcca_uvov" +#define TPS65224_IRQ_NAME_VMON1_UVOV "vmon1_uvov" +#define TPS65224_IRQ_NAME_VMON2_UVOV "vmon2_uvov" +#define TPS65224_IRQ_NAME_GPIO1 "gpio1" +#define TPS65224_IRQ_NAME_GPIO2 "gpio2" +#define TPS65224_IRQ_NAME_GPIO3 "gpio3" +#define TPS65224_IRQ_NAME_GPIO4 "gpio4" +#define TPS65224_IRQ_NAME_GPIO5 "gpio5" +#define TPS65224_IRQ_NAME_GPIO6 "gpio6" +#define TPS65224_IRQ_NAME_VSENSE "vsense" +#define TPS65224_IRQ_NAME_ENABLE "enable" +#define TPS65224_IRQ_NAME_PB_SHORT "pb_short" +#define TPS65224_IRQ_NAME_FSD "fsd" +#define TPS65224_IRQ_NAME_SOFT_REBOOT "soft_reboot" +#define TPS65224_IRQ_NAME_BIST_PASS "bist_pass" +#define TPS65224_IRQ_NAME_EXT_CLK "ext_clk" +#define TPS65224_IRQ_NAME_REG_UNLOCK "reg_unlock" +#define TPS65224_IRQ_NAME_TWARN "twarn" +#define TPS65224_IRQ_NAME_PB_LONG "pb_long" +#define TPS65224_IRQ_NAME_PB_FALL "pb_fall" +#define TPS65224_IRQ_NAME_PB_RISE "pb_rise" +#define TPS65224_IRQ_NAME_ADC_CONV_READY "adc_conv_ready" +#define TPS65224_IRQ_NAME_TSD_ORD "tsd_ord" +#define TPS65224_IRQ_NAME_BIST_FAIL "bist_fail" +#define TPS65224_IRQ_NAME_REG_CRC_ERR "reg_crc_err" +#define TPS65224_IRQ_NAME_RECOV_CNT "recov_cnt" +#define TPS65224_IRQ_NAME_TSD_IMM "tsd_imm" +#define TPS65224_IRQ_NAME_VCCA_OVP "vcca_ovp" +#define TPS65224_IRQ_NAME_PFSM_ERR "pfsm_err" +#define TPS65224_IRQ_NAME_BG_XMON "bg_xmon" +#define TPS65224_IRQ_NAME_IMM_SHUTDOWN "imm_shutdown" +#define TPS65224_IRQ_NAME_ORD_SHUTDOWN "ord_shutdown" +#define TPS65224_IRQ_NAME_MCU_PWR_ERR "mcu_pwr_err" +#define TPS65224_IRQ_NAME_SOC_PWR_ERR "soc_pwr_err" +#define TPS65224_IRQ_NAME_COMM_ERR "comm_err" +#define TPS65224_IRQ_NAME_I2C2_ERR "i2c2_err" +#define TPS65224_IRQ_NAME_POWERUP "powerup" + /** * struct tps6594 - device private data structure * @@ -1014,7 +1337,9 @@ struct tps6594 { struct regmap_irq_chip_data *irq_data; }; -bool tps6594_is_volatile_reg(struct device *dev, unsigned int reg); +extern const struct regmap_access_table tps6594_volatile_table; +extern const struct regmap_access_table tps65224_volatile_table; + int tps6594_device_init(struct tps6594 *tps, bool enable_crc); #endif /* __LINUX_MFD_TPS6594_H */ diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index c062d91a67d9..85dc406173db 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -461,6 +461,7 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot) #define TWL4030_PM_MASTER_GLOBAL_TST 0xb6 +#define TWL6030_PHOENIX_DEV_ON 0x06 /*----------------------------------------------------------------------*/ /* Power bus message definitions */ diff --git a/include/linux/mhi.h b/include/linux/mhi.h index d0f9b522f328..b573f15762f8 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -320,12 +320,7 @@ struct mhi_controller_config { * @hw_ev_rings: Number of hardware event rings * @sw_ev_rings: Number of software event rings * @nr_irqs: Number of IRQ allocated by bus master (required) - * @family_number: MHI controller family number - * @device_number: MHI controller device number - * @major_version: MHI controller major revision number - * @minor_version: MHI controller minor revision number * @serial_number: MHI controller serial number obtained from BHI - * @oem_pk_hash: MHI controller OEM PK Hash obtained from BHI * @mhi_event: MHI event ring configurations table * @mhi_cmd: MHI command ring configurations table * @mhi_ctxt: MHI device context, shared memory between host and device @@ -358,6 +353,7 @@ struct mhi_controller_config { * @read_reg: Read a MHI register via the physical link (required) * @write_reg: Write a MHI register via the physical link (required) * @reset: Controller specific reset function (optional) + * @edl_trigger: CB function to trigger EDL mode (optional) * @buffer_len: Bounce buffer length * @index: Index of the MHI controller instance * @bounce_buf: Use of bounce buffer @@ -369,15 +365,6 @@ struct mhi_controller_config { * Fields marked as (required) need to be populated by the controller driver * before calling mhi_register_controller(). For the fields marked as (optional) * they can be populated depending on the usecase. - * - * The following fields are present for the purpose of implementing any device - * specific quirks or customizations for specific MHI revisions used in device - * by the controller drivers. The MHI stack will just populate these fields - * during mhi_register_controller(): - * family_number - * device_number - * major_version - * minor_version */ struct mhi_controller { struct device *cntrl_dev; @@ -408,12 +395,7 @@ struct mhi_controller { u32 hw_ev_rings; u32 sw_ev_rings; u32 nr_irqs; - u32 family_number; - u32 device_number; - u32 major_version; - u32 minor_version; u32 serial_number; - u32 oem_pk_hash[MHI_MAX_OEM_PK_HASH_SEGMENTS]; struct mhi_event *mhi_event; struct mhi_cmd *mhi_cmd; @@ -454,6 +436,7 @@ struct mhi_controller { void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 val); void (*reset)(struct mhi_controller *mhi_cntrl); + int (*edl_trigger)(struct mhi_controller *mhi_cntrl); size_t buffer_len; int index; @@ -649,13 +632,29 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl); int mhi_sync_power_up(struct mhi_controller *mhi_cntrl); /** - * mhi_power_down - Start MHI power down sequence + * mhi_power_down - Power down the MHI device and also destroy the + * 'struct device' for the channels associated with it. + * See also mhi_power_down_keep_dev() which is a variant + * of this API that keeps the 'struct device' for channels + * (useful during suspend/hibernation). * @mhi_cntrl: MHI controller * @graceful: Link is still accessible, so do a graceful shutdown process */ void mhi_power_down(struct mhi_controller *mhi_cntrl, bool graceful); /** + * mhi_power_down_keep_dev - Power down the MHI device but keep the 'struct + * device' for the channels associated with it. + * This is a variant of 'mhi_power_down()' and + * useful in scenarios such as suspend/hibernation + * where destroying of the 'struct device' is not + * needed. + * @mhi_cntrl: MHI controller + * @graceful: Link is still accessible, so do a graceful shutdown process + */ +void mhi_power_down_keep_dev(struct mhi_controller *mhi_cntrl, bool graceful); + +/** * mhi_unprepare_after_power_down - Free any allocated memory after power down * @mhi_cntrl: MHI controller */ @@ -817,4 +816,13 @@ int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir, */ bool mhi_queue_is_full(struct mhi_device *mhi_dev, enum dma_data_direction dir); +/** + * mhi_get_channel_doorbell_offset - Get the channel doorbell offset + * @mhi_cntrl: MHI controller + * @chdb_offset: Read channel doorbell offset + * + * Return: 0 if the read succeeds, a negative error code otherwise + */ +int mhi_get_channel_doorbell_offset(struct mhi_controller *mhi_cntrl, u32 *chdb_offset); + #endif /* _MHI_H_ */ diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 44077837385f..d52daf45861b 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -35,31 +35,33 @@ static __always_inline void min_heapify(struct min_heap *heap, int pos, const struct min_heap_callbacks *func) { - void *left, *right, *parent, *smallest; + void *left, *right; void *data = heap->data; + void *root = data + pos * func->elem_size; + int i = pos, j; + /* Find the sift-down path all the way to the leaves. */ for (;;) { - if (pos * 2 + 1 >= heap->nr) + if (i * 2 + 2 >= heap->nr) break; + left = data + (i * 2 + 1) * func->elem_size; + right = data + (i * 2 + 2) * func->elem_size; + i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2; + } - left = data + ((pos * 2 + 1) * func->elem_size); - parent = data + (pos * func->elem_size); - smallest = parent; - if (func->less(left, smallest)) - smallest = left; - - if (pos * 2 + 2 < heap->nr) { - right = data + ((pos * 2 + 2) * func->elem_size); - if (func->less(right, smallest)) - smallest = right; - } - if (smallest == parent) - break; - func->swp(smallest, parent); - if (smallest == left) - pos = (pos * 2) + 1; - else - pos = (pos * 2) + 2; + /* Special case for the last leaf with no sibling. */ + if (i * 2 + 2 == heap->nr) + i = i * 2 + 1; + + /* Backtrack to the correct location. */ + while (i != pos && func->less(root, data + i * func->elem_size)) + i = (i - 1) / 2; + + /* Shift the element into its correct place. */ + j = i; + while (i != pos) { + i = (i - 1) / 2; + func->swp(data + i * func->elem_size, data + j * func->elem_size); } } @@ -70,7 +72,7 @@ void min_heapify_all(struct min_heap *heap, { int i; - for (i = heap->nr / 2; i >= 0; i--) + for (i = heap->nr / 2 - 1; i >= 0; i--) min_heapify(heap, i, func); } diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index cb15308b5cb0..991526039ccb 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -95,9 +95,10 @@ enum { }; enum { - MLX5_CQ_MODIFY_PERIOD = 1 << 0, - MLX5_CQ_MODIFY_COUNT = 1 << 1, - MLX5_CQ_MODIFY_OVERRUN = 1 << 2, + MLX5_CQ_MODIFY_PERIOD = BIT(0), + MLX5_CQ_MODIFY_COUNT = BIT(1), + MLX5_CQ_MODIFY_OVERRUN = BIT(2), + MLX5_CQ_MODIFY_PERIOD_MODE = BIT(4), }; enum { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 01275c6e8468..d7bb31d9a446 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -68,7 +68,7 @@ #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8) #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32) #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) -#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld))) +#define MLX5_ADDR_OF(typ, p, fld) ((void *)((u8 *)(p) + MLX5_BYTE_OFF(typ, fld))) /* insert a value to a struct */ #define MLX5_SET(typ, p, fld, v) do { \ @@ -1336,6 +1336,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_ESW_FT_FIELD_SUPPORT_2(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE(mdev, ft_field_support_2_esw_fdb.cap) +#define MLX5_CAP_NIC_RX_FT_FIELD_SUPPORT_2(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, ft_field_support_2_nic_receive.cap) + #define MLX5_CAP_ESW(mdev, cap) \ MLX5_GET(e_switch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH]->cur, cap) @@ -1359,6 +1362,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) +#define MLX5_CAP_PORT_SELECTION_FT_FIELD_SUPPORT_2(mdev, cap) \ + MLX5_CAP_PORT_SELECTION(mdev, ft_field_support_2_port_selection.cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 41f03b352401..779cfdf2e9d6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -85,7 +85,7 @@ enum mlx5_sqp_t { }; enum { - MLX5_MAX_PORTS = 4, + MLX5_MAX_PORTS = 8, }; enum { @@ -823,6 +823,7 @@ struct mlx5_core_dev { struct blocking_notifier_head macsec_nh; #endif u64 num_ipsec_offloads; + struct mlx5_sd *sd; }; struct mlx5_db { @@ -861,6 +862,7 @@ struct mlx5_cmd_work_ent { void *context; int idx; struct completion handling; + struct completion slotted; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; @@ -1373,11 +1375,4 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) enum { MLX5_OCTWORD = 16, }; - -struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, - irqreturn_t (*handler)(int, void *), - const struct irq_affinity_desc *affdesc, - const char *name); -void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map); - #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 486b7492050c..f468763478ae 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -416,7 +416,10 @@ struct mlx5_ifc_flow_table_fields_supported_bits { /* Table 2170 - Flow Table Fields Supported 2 Format */ struct mlx5_ifc_flow_table_fields_supported_2_bits { - u8 reserved_at_0[0xe]; + u8 reserved_at_0[0x2]; + u8 inner_l4_type[0x1]; + u8 outer_l4_type[0x1]; + u8 reserved_at_4[0xa]; u8 bth_opcode[0x1]; u8 reserved_at_f[0x1]; u8 tunnel_header_0_1[0x1]; @@ -525,6 +528,12 @@ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits { u8 reserved_at_0[0x80]; }; +enum { + MLX5_PACKET_L4_TYPE_NONE, + MLX5_PACKET_L4_TYPE_TCP, + MLX5_PACKET_L4_TYPE_UDP, +}; + struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 smac_47_16[0x20]; @@ -550,7 +559,8 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 tcp_sport[0x10]; u8 tcp_dport[0x10]; - u8 reserved_at_c0[0x10]; + u8 l4_type[0x2]; + u8 reserved_at_c2[0xe]; u8 ipv4_ihl[0x4]; u8 reserved_at_c4[0x4]; @@ -846,7 +856,11 @@ struct mlx5_ifc_flow_table_nic_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer; - u8 reserved_at_e00[0x700]; + u8 reserved_at_e00[0x600]; + + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive; + + u8 reserved_at_1480[0x80]; struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive_rdma; @@ -876,7 +890,9 @@ struct mlx5_ifc_port_selection_cap_bits { struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_port_selection; - u8 reserved_at_400[0x7c00]; + struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_port_selection; + + u8 reserved_at_480[0x7b80]; }; enum { @@ -1469,7 +1485,9 @@ enum { }; struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x10]; + u8 reserved_at_0[0x6]; + u8 page_request_disable[0x1]; + u8 reserved_at_7[0x9]; u8 shared_object_to_user_object_allowed[0x1]; u8 reserved_at_13[0xe]; u8 vhca_resource_manager[0x1]; @@ -1668,7 +1686,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cq_oi[0x1]; u8 cq_resize[0x1]; u8 cq_moderation[0x1]; - u8 reserved_at_223[0x3]; + u8 cq_period_mode_modify[0x1]; + u8 reserved_at_224[0x2]; u8 cq_eq_remap[0x1]; u8 pg[0x1]; u8 block_lb_mc[0x1]; @@ -2004,7 +2023,13 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_3a0[0x10]; u8 max_rqt_vhca_id[0x10]; - u8 reserved_at_3c0[0x440]; + u8 reserved_at_3c0[0x20]; + + u8 reserved_at_3e0[0x10]; + u8 pcc_ifa2[0x1]; + u8 reserved_at_3f1[0xf]; + + u8 reserved_at_400[0x400]; }; enum mlx5_ifc_flow_destination_type { @@ -4361,10 +4386,10 @@ enum { MLX5_CQC_ST_FIRED = 0xa, }; -enum { +enum mlx5_cq_period_mode { MLX5_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, MLX5_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, - MLX5_CQ_PERIOD_NUM_MODES + MLX5_CQ_PERIOD_NUM_MODES, }; struct mlx5_ifc_cqc_bits { @@ -9793,7 +9818,21 @@ struct mlx5_ifc_pplm_reg_bits { u8 fec_override_admin_100g_2x[0x10]; u8 fec_override_admin_50g_1x[0x10]; - u8 reserved_at_140[0x140]; + u8 fec_override_cap_800g_8x[0x10]; + u8 fec_override_cap_400g_4x[0x10]; + + u8 fec_override_cap_200g_2x[0x10]; + u8 fec_override_cap_100g_1x[0x10]; + + u8 reserved_at_180[0xa0]; + + u8 fec_override_admin_800g_8x[0x10]; + u8 fec_override_admin_400g_4x[0x10]; + + u8 fec_override_admin_200g_2x[0x10]; + u8 fec_override_admin_100g_1x[0x10]; + + u8 reserved_at_260[0x20]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -10165,7 +10204,9 @@ struct mlx5_ifc_mtutc_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x68]; + u8 reserved_at_0[0x48]; + u8 fec_100G_per_lane_in_pplm[0x1]; + u8 reserved_at_49[0x1f]; u8 fec_50G_per_lane_in_pplm[0x1]; u8 reserved_at_69[0x4]; u8 rx_icrc_encapsulated_counter[0x1]; @@ -10253,7 +10294,9 @@ struct mlx5_ifc_mcam_access_reg_bits { u8 mcqi[0x1]; u8 mcqs[0x1]; - u8 regs_95_to_87[0x9]; + u8 regs_95_to_90[0x6]; + u8 mpir[0x1]; + u8 regs_88_to_87[0x2]; u8 mpegc[0x1]; u8 mtutc[0x1]; u8 regs_84_to_68[0x11]; @@ -10663,6 +10706,7 @@ enum { MLX5_INITIAL_SEG_NIC_INTERFACE_FULL_DRIVER = 0x0, MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED = 0x1, MLX5_INITIAL_SEG_NIC_INTERFACE_NO_DRAM_NIC = 0x2, + MLX5_INITIAL_SEG_NIC_INTERFACE_SW_RESET = 0x7, }; enum { @@ -12674,6 +12718,11 @@ struct mlx5_ifc_modify_page_track_obj_in_bits { struct mlx5_ifc_page_track_bits obj_context; }; +struct mlx5_ifc_query_page_track_obj_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + struct mlx5_ifc_page_track_bits obj_context; +}; + struct mlx5_ifc_msecq_reg_bits { u8 reserved_at_0[0x20]; @@ -12707,6 +12756,14 @@ enum mlx5_msees_oper_status { MLX5_MSEES_OPER_STATUS_FAIL_FREE_RUNNING = 0x5, }; +enum mlx5_msees_failure_reason { + MLX5_MSEES_FAILURE_REASON_UNDEFINED_ERROR = 0x0, + MLX5_MSEES_FAILURE_REASON_PORT_DOWN = 0x1, + MLX5_MSEES_FAILURE_REASON_TOO_HIGH_FREQUENCY_DIFF = 0x2, + MLX5_MSEES_FAILURE_REASON_NET_SYNCHRONIZER_DEVICE_ERROR = 0x3, + MLX5_MSEES_FAILURE_REASON_LACK_OF_RESOURCES = 0x4, +}; + struct mlx5_ifc_msees_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; diff --git a/include/linux/mm.h b/include/linux/mm.h index f5a97dec5169..9849dfda44d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,7 @@ #include <linux/errno.h> #include <linux/mmdebug.h> #include <linux/gfp.h> +#include <linux/pgalloc_tag.h> #include <linux/bug.h> #include <linux/list.h> #include <linux/mmzone.h> @@ -36,6 +37,7 @@ struct anon_vma; struct anon_vma_chain; struct user_struct; struct pt_regs; +struct folio_batch; extern int sysctl_page_lock_unfairness; @@ -86,7 +88,7 @@ extern int sysctl_legacy_va_layout; #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS extern const int mmap_rnd_bits_min; -extern const int mmap_rnd_bits_max; +extern int mmap_rnd_bits_max __ro_after_init; extern int mmap_rnd_bits __read_mostly; #endif #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS @@ -226,7 +228,6 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) static inline struct folio *lru_to_folio(struct list_head *head) { return list_entry((head)->prev, struct folio, lru); @@ -391,6 +392,20 @@ extern unsigned int kobjsize(const void *objp); # define VM_UFFD_MINOR VM_NONE #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ +/* + * This flag is used to connect VFIO to arch specific KVM code. It + * indicates that the memory under this VMA is safe for use with any + * non-cachable memory type inside KVM. Some VFIO devices, on some + * platforms, are thought to be unsafe and can cause machine crashes + * if KVM does not lock down the memory type. + */ +#ifdef CONFIG_64BIT +#define VM_ALLOW_ANY_UNCACHED_BIT 39 +#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT) +#else +#define VM_ALLOW_ANY_UNCACHED VM_NONE +#endif + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) @@ -781,6 +796,11 @@ static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, return NULL; } +static inline void vma_assert_locked(struct vm_area_struct *vma) +{ + mmap_assert_locked(vma->vm_mm); +} + static inline void release_fault_lock(struct vm_fault *vmf) { mmap_read_unlock(vmf->vma->vm_mm); @@ -1178,9 +1198,9 @@ static inline int is_vmalloc_or_module_addr(const void *x) * How many times the entire folio is mapped as a single unit (eg by a * PMD or PUD entry). This is probably not what you want, except for * debugging purposes - it does not include PTE-mapped sub-pages; look - * at folio_mapcount() or page_mapcount() or total_mapcount() instead. + * at folio_mapcount() or page_mapcount() instead. */ -static inline int folio_entire_mapcount(struct folio *folio) +static inline int folio_entire_mapcount(const struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); return atomic_read(&folio->_entire_mapcount) + 1; @@ -1204,55 +1224,60 @@ static inline void page_mapcount_reset(struct page *page) * a large folio, it includes the number of times this page is mapped * as part of that folio. * - * The result is undefined for pages which cannot be mapped into userspace. - * For example SLAB or special types of pages. See function page_has_type(). - * They use this field in struct page differently. + * Will report 0 for pages which cannot be mapped into userspace, eg + * slab, page tables and similar. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); return mapcount; } -int folio_total_mapcount(struct folio *folio); +static inline int folio_large_mapcount(const struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_test_large(folio), folio); + return atomic_read(&folio->_large_mapcount) + 1; +} /** - * folio_mapcount() - Calculate the number of mappings of this folio. + * folio_mapcount() - Number of mappings of this folio. * @folio: The folio. * - * A large folio tracks both how many times the entire folio is mapped, - * and how many times each individual page in the folio is mapped. - * This function calculates the total number of times the folio is - * mapped. + * The folio mapcount corresponds to the number of present user page table + * entries that reference any part of a folio. Each such present user page + * table entry must be paired with exactly on folio reference. + * + * For ordindary folios, each user page table entry (PTE/PMD/PUD/...) counts + * exactly once. + * + * For hugetlb folios, each abstracted "hugetlb" user page table entry that + * references the entire folio counts exactly once, even when such special + * page table entries are comprised of multiple ordinary page table entries. + * + * Will report 0 for pages which cannot be mapped into userspace, such as + * slab, page tables and similar. * * Return: The number of times this folio is mapped. */ -static inline int folio_mapcount(struct folio *folio) -{ - if (likely(!folio_test_large(folio))) - return atomic_read(&folio->_mapcount) + 1; - return folio_total_mapcount(folio); -} - -static inline int total_mapcount(struct page *page) +static inline int folio_mapcount(const struct folio *folio) { - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) + 1; - return folio_total_mapcount(page_folio(page)); -} + int mapcount; -static inline bool folio_large_is_mapped(struct folio *folio) -{ - /* - * Reading _entire_mapcount below could be omitted if hugetlb - * participated in incrementing nr_pages_mapped when compound mapped. - */ - return atomic_read(&folio->_nr_pages_mapped) > 0 || - atomic_read(&folio->_entire_mapcount) >= 0; + if (likely(!folio_test_large(folio))) { + mapcount = atomic_read(&folio->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + mapcount = 0; + return mapcount; + } + return folio_large_mapcount(folio); } /** @@ -1261,11 +1286,9 @@ static inline bool folio_large_is_mapped(struct folio *folio) * * Return: True if any page in this folio is referenced by user page tables. */ -static inline bool folio_mapped(struct folio *folio) +static inline bool folio_mapped(const struct folio *folio) { - if (likely(!folio_test_large(folio))) - return atomic_read(&folio->_mapcount) >= 0; - return folio_large_is_mapped(folio); + return folio_mapcount(folio) >= 1; } /* @@ -1273,11 +1296,9 @@ static inline bool folio_mapped(struct folio *folio) * For compound page it returns true if any sub-page of compound page is mapped, * even if this particular sub-page is not itself mapped by any PTE or PMD. */ -static inline bool page_mapped(struct page *page) +static inline bool page_mapped(const struct page *page) { - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) >= 0; - return folio_large_is_mapped(page_folio(page)); + return folio_mapped(page_folio(page)); } static inline struct page *virt_to_head_page(const void *x) @@ -1303,8 +1324,6 @@ void folio_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); -void destroy_large_folio(struct folio *folio); - /* Returns the number of bytes in this potentially compound page. */ static inline unsigned long page_size(struct page *page) { @@ -1422,27 +1441,22 @@ vm_fault_t finish_fault(struct vm_fault *vmf); #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -bool __put_devmap_managed_page_refs(struct page *page, int refs); -static inline bool put_devmap_managed_page_refs(struct page *page, int refs) +bool __put_devmap_managed_folio_refs(struct folio *folio, int refs); +static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) { if (!static_branch_unlikely(&devmap_managed_key)) return false; - if (!is_zone_device_page(page)) + if (!folio_is_zone_device(folio)) return false; - return __put_devmap_managed_page_refs(page, refs); + return __put_devmap_managed_folio_refs(folio, refs); } #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page_refs(struct page *page, int refs) +static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) { return false; } #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page(struct page *page) -{ - return put_devmap_managed_page_refs(page, 1); -} - /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) @@ -1514,6 +1528,8 @@ static inline void folio_put_refs(struct folio *folio, int refs) __folio_put(folio); } +void folios_put_refs(struct folio_batch *folios, unsigned int *refs); + /* * union release_pages_arg - an array of pages or folios * @@ -1536,18 +1552,19 @@ void release_pages(release_pages_arg, int nr); /** * folios_put - Decrement the reference count on an array of folios. * @folios: The folios. - * @nr: How many folios there are. * - * Like folio_put(), but for an array of folios. This is more efficient - * than writing the loop yourself as it will optimise the locks which - * need to be taken if the folios are freed. + * Like folio_put(), but for a batch of folios. This is more efficient + * than writing the loop yourself as it will optimise the locks which need + * to be taken if the folios are freed. The folios batch is returned + * empty and ready to be reused for another batch; there is no need to + * reinitialise it. * * Context: May be called in process or interrupt context, but not in NMI * context. May be called while holding a spinlock. */ -static inline void folios_put(struct folio **folios, unsigned int nr) +static inline void folios_put(struct folio_batch *folios) { - release_pages(folios, nr); + folios_put_refs(folios, NULL); } static inline void put_page(struct page *page) @@ -1558,7 +1575,7 @@ static inline void put_page(struct page *page) * For some devmap managed pages we need to catch refcount transition * from 2 to 1: */ - if (put_devmap_managed_page(&folio->page)) + if (put_devmap_managed_folio_refs(folio, 1)) return; folio_put(folio); } @@ -1640,13 +1657,11 @@ static inline int page_zone_id(struct page *page) } #ifdef NODE_NOT_IN_PAGE_FLAGS -extern int page_to_nid(const struct page *page); +int page_to_nid(const struct page *page); #else static inline int page_to_nid(const struct page *page) { - struct page *p = (struct page *)page; - - return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK; + return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK; } #endif @@ -2054,7 +2069,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, * * Return: A positive power of two. */ -static inline long folio_nr_pages(struct folio *folio) +static inline long folio_nr_pages(const struct folio *folio) { if (!folio_test_large(folio)) return 1; @@ -2065,6 +2080,13 @@ static inline long folio_nr_pages(struct folio *folio) #endif } +/* Only hugetlbfs can allocate folios larger than MAX_ORDER */ +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE +#define MAX_FOLIO_NR_PAGES (1UL << PUD_ORDER) +#else +#define MAX_FOLIO_NR_PAGES MAX_ORDER_NR_PAGES +#endif + /* * compound_nr() returns the number of pages in this potentially compound * page. compound_nr() can be called on a tail page, and is defined to @@ -2142,21 +2164,64 @@ static inline size_t folio_size(struct folio *folio) } /** - * folio_estimated_sharers - Estimate the number of sharers of a folio. + * folio_likely_mapped_shared - Estimate if the folio is mapped into the page + * tables of more than one MM * @folio: The folio. * - * folio_estimated_sharers() aims to serve as a function to efficiently - * estimate the number of processes sharing a folio. This is done by - * looking at the precise mapcount of the first subpage in the folio, and - * assuming the other subpages are the same. This may not be true for large - * folios. If you want exact mapcounts for exact calculations, look at - * page_mapcount() or folio_total_mapcount(). + * This function checks if the folio is currently mapped into more than one + * MM ("mapped shared"), or if the folio is only mapped into a single MM + * ("mapped exclusively"). * - * Return: The estimated number of processes sharing a folio. + * As precise information is not easily available for all folios, this function + * estimates the number of MMs ("sharers") that are currently mapping a folio + * using the number of times the first page of the folio is currently mapped + * into page tables. + * + * For small anonymous folios (except KSM folios) and anonymous hugetlb folios, + * the return value will be exactly correct, because they can only be mapped + * at most once into an MM, and they cannot be partially mapped. + * + * For other folios, the result can be fuzzy: + * #. For partially-mappable large folios (THP), the return value can wrongly + * indicate "mapped exclusively" (false negative) when the folio is + * only partially mapped into at least one MM. + * #. For pagecache folios (including hugetlb), the return value can wrongly + * indicate "mapped shared" (false positive) when two VMAs in the same MM + * cover the same file range. + * #. For (small) KSM folios, the return value can wrongly indicate "mapped + * shared" (false positive), when the folio is mapped multiple times into + * the same MM. + * + * Further, this function only considers current page table mappings that + * are tracked using the folio mapcount(s). + * + * This function does not consider: + * #. If the folio might get mapped in the (near) future (e.g., swapcache, + * pagecache, temporary unmapping for migration). + * #. If the folio is mapped differently (VM_PFNMAP). + * #. If hugetlb page table sharing applies. Callers might want to check + * hugetlb_pmd_shared(). + * + * Return: Whether the folio is estimated to be mapped into more than one MM. */ -static inline int folio_estimated_sharers(struct folio *folio) +static inline bool folio_likely_mapped_shared(struct folio *folio) { - return page_mapcount(folio_page(folio, 0)); + int mapcount = folio_mapcount(folio); + + /* Only partially-mappable folios require more care. */ + if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio))) + return mapcount > 1; + + /* A single mapping implies "mapped exclusively". */ + if (mapcount <= 1) + return false; + + /* If any page is mapped more than once we treat it "mapped shared". */ + if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio)) + return true; + + /* Let's guess based on the first subpage. */ + return atomic_read(&folio->_mapcount) > 0; } #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE @@ -2187,11 +2252,6 @@ static inline int arch_make_folio_accessible(struct folio *folio) */ #include <linux/vmstat.h> -static __always_inline void *lowmem_page_address(const struct page *page) -{ - return page_to_virt(page); -} - #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) #define HASHED_PAGE_VIRTUAL #endif @@ -2214,6 +2274,11 @@ void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif +static __always_inline void *lowmem_page_address(const struct page *page) +{ + return page_to_virt(page); +} + #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) #define page_address(page) lowmem_page_address(page) #define set_page_address(page, address) do { } while(0) @@ -2371,12 +2436,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -int follow_pte(struct mm_struct *mm, unsigned long address, +int follow_pte(struct vm_area_struct *vma, unsigned long address, pte_t **ptepp, spinlock_t **ptlp); -int follow_pfn(struct vm_area_struct *vma, unsigned long address, - unsigned long *pfn); -int follow_phys(struct vm_area_struct *vma, unsigned long address, - unsigned int flags, unsigned long *prot, resource_size_t *phys); int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); @@ -2530,7 +2591,7 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, MM_CP_UFFD_WP_RESOLVE) bool vma_needs_dirty_tracking(struct vm_area_struct *vma); -int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); +bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) { /* @@ -2595,19 +2656,19 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member) mm_trace_rss_stat(mm, member); } -/* Optimized variant when page is already known not to be PageAnon */ -static inline int mm_counter_file(struct page *page) +/* Optimized variant when folio is already known not to be anon */ +static inline int mm_counter_file(struct folio *folio) { - if (PageSwapBacked(page)) + if (folio_test_swapbacked(folio)) return MM_SHMEMPAGES; return MM_FILEPAGES; } -static inline int mm_counter(struct page *page) +static inline int mm_counter(struct folio *folio) { - if (PageAnon(page)) + if (folio_test_anon(folio)) return MM_ANONPAGES; - return mm_counter_file(page); + return mm_counter_file(folio); } static inline unsigned long get_mm_rss(struct mm_struct *mm) @@ -2837,12 +2898,13 @@ static inline bool pagetable_is_reserved(struct ptdesc *pt) * * Return: The ptdesc describing the allocated page tables. */ -static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) +static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order) { - struct page *page = alloc_pages(gfp | __GFP_COMP, order); + struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order); return page_ptdesc(page); } +#define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__)) /** * pagetable_free - Free pagetables @@ -3112,6 +3174,14 @@ extern void reserve_bootmem_region(phys_addr_t start, /* Free the reserved page into the buddy system, so it gets managed. */ static inline void free_reserved_page(struct page *page) { + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } ClearPageReserved(page); init_page_count(page); __free_page(page); @@ -3173,8 +3243,6 @@ static inline unsigned long get_num_physpages(void) */ void free_area_init(unsigned long *max_zone_pfn); unsigned long node_map_pfn_alignment(void); -unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, - unsigned long end_pfn); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, @@ -3190,7 +3258,6 @@ static inline int early_pfn_to_nid(unsigned long pfn) extern int __meminit early_pfn_to_nid(unsigned long pfn); #endif -extern void set_dma_reserve(unsigned long new_dma_reserve); extern void mem_init(void); extern void __init mmap_init(void); @@ -3202,9 +3269,6 @@ static inline void show_mem(void) extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); -#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES -extern unsigned long arch_reserved_kernel_pages(void); -#endif extern __printf(3, 4) void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...); @@ -3363,7 +3427,16 @@ extern int install_special_mapping(struct mm_struct *mm, unsigned long randomize_stack_top(unsigned long stack_top); unsigned long randomize_page(unsigned long start, unsigned long range); -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); +unsigned long +__get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); + +static inline unsigned long +get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + return __get_unmapped_area(file, addr, len, pgoff, flags, 0); +} extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, @@ -3409,6 +3482,7 @@ struct vm_unmapped_area_info { unsigned long high_limit; unsigned long align_mask; unsigned long align_offset; + unsigned long start_gap; }; extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info); @@ -3702,7 +3776,14 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); static inline bool want_init_on_free(void) { return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, - &init_on_free); + &init_on_free); +} + +DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); +static inline bool want_init_mlocked_on_free(void) +{ + return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, + &init_mlocked_on_free); } extern bool _debug_pagealloc_enabled_early; @@ -3765,24 +3846,22 @@ static inline bool page_is_guard(struct page *page) return PageGuard(page); } -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return false; - return __set_page_guard(zone, page, order, migratetype); + return __set_page_guard(zone, page, order); } -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return; - __clear_page_guard(zone, page, order, migratetype); + __clear_page_guard(zone, page, order); } #else /* CONFIG_DEBUG_PAGEALLOC */ @@ -3792,9 +3871,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool debug_guardpage_enabled(void) { return false; } static inline bool page_is_guard(struct page *page) { return false; } static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) { return false; } + unsigned int order) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) {} + unsigned int order) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA @@ -3949,7 +4028,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); -extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE @@ -4182,4 +4260,7 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn) return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); } +void vma_pgtable_walk_begin(struct vm_area_struct *vma); +void vma_pgtable_walk_end(struct vm_area_struct *vma); + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8b611e13153e..24323c7d0bd4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -169,7 +169,7 @@ struct page { /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; -#ifdef CONFIG_MEMCG +#ifdef CONFIG_SLAB_OBJ_EXT unsigned long memcg_data; #endif @@ -210,8 +210,8 @@ struct page { * * An 'encoded_page' pointer is a pointer to a regular 'struct page', but * with the low bits of the pointer indicating extra context-dependent - * information. Not super-common, but happens in mmu_gather and mlock - * handling, and this acts as a type system check on that use. + * information. Only used in mmu_gather handling, and this acts as a type + * system check on that use. * * We only really have two guaranteed bits in general, although you could * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE) @@ -220,21 +220,46 @@ struct page { * Use the supplied helper functions to endcode/decode the pointer and bits. */ struct encoded_page; -#define ENCODE_PAGE_BITS 3ul + +#define ENCODED_PAGE_BITS 3ul + +/* Perform rmap removal after we have flushed the TLB. */ +#define ENCODED_PAGE_BIT_DELAY_RMAP 1ul + +/* + * The next item in an encoded_page array is the "nr_pages" argument, specifying + * the number of consecutive pages starting from this page, that all belong to + * the same folio. For example, "nr_pages" corresponds to the number of folio + * references that must be dropped. If this bit is not set, "nr_pages" is + * implicitly 1. + */ +#define ENCODED_PAGE_BIT_NR_PAGES_NEXT 2ul + static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags) { - BUILD_BUG_ON(flags > ENCODE_PAGE_BITS); + BUILD_BUG_ON(flags > ENCODED_PAGE_BITS); return (struct encoded_page *)(flags | (unsigned long)page); } static inline unsigned long encoded_page_flags(struct encoded_page *page) { - return ENCODE_PAGE_BITS & (unsigned long)page; + return ENCODED_PAGE_BITS & (unsigned long)page; } static inline struct page *encoded_page_ptr(struct encoded_page *page) { - return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page); + return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page); +} + +static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr) +{ + VM_WARN_ON_ONCE((nr << 2) >> 2 != nr); + return (struct encoded_page *)(nr << 2); +} + +static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page) +{ + return ((unsigned long)page) >> 2; } /* @@ -264,7 +289,8 @@ typedef struct { * @virtual: Virtual address in the kernel direct map. * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). - * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). + * @_large_mapcount: Do not use directly, call folio_mapcount(). + * @_nr_pages_mapped: Do not use outside of rmap and debug code. * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. @@ -306,7 +332,7 @@ struct folio { }; atomic_t _mapcount; atomic_t _refcount; -#ifdef CONFIG_MEMCG +#ifdef CONFIG_SLAB_OBJ_EXT unsigned long memcg_data; #endif #if defined(WANT_PAGE_VIRTUAL) @@ -323,8 +349,8 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; - unsigned long _folio_avail; /* public: */ + atomic_t _large_mapcount; atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; @@ -394,12 +420,13 @@ FOLIO_MATCH(compound_head, _head_2a); /** * struct ptdesc - Memory descriptor for page tables. - * @__page_flags: Same as page flags. Unused for page tables. + * @__page_flags: Same as page flags. Powerpc only. * @pt_rcu_head: For freeing page table pages. * @pt_list: List of used page tables. Used for s390 and x86. * @_pt_pad_1: Padding that aliases with page's compound head. * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. * @__page_mapping: Aliases with page->mapping. Unused for page tables. + * @pt_index: Used for s390 gmap. * @pt_mm: Used for x86 pgds. * @pt_frag_refcount: For fragmented page table tracking. Powerpc only. * @_pt_pad_2: Padding to ensure proper alignment. @@ -425,6 +452,7 @@ struct ptdesc { unsigned long __page_mapping; union { + pgoff_t pt_index; struct mm_struct *pt_mm; atomic_t pt_frag_refcount; }; @@ -450,6 +478,7 @@ TABLE_MATCH(flags, __page_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); +TABLE_MATCH(index, pt_index); TABLE_MATCH(rcu_head, pt_rcu_head); TABLE_MATCH(page_type, __page_type); TABLE_MATCH(_refcount, __page_refcount); @@ -643,6 +672,9 @@ struct vm_area_struct { }; #ifdef CONFIG_PER_VMA_LOCK + /* Flag to indicate areas detached from the mm->mm_mt tree */ + bool detached; + /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) @@ -659,9 +691,6 @@ struct vm_area_struct { */ int vm_lock_seq; struct vma_lock *vm_lock; - - /* Flag to indicate areas detached from the mm->mm_mt tree */ - bool detached; #endif /* @@ -749,11 +778,7 @@ struct mm_struct { } ____cacheline_aligned_in_smp; struct maple_tree mm_mt; -#ifdef CONFIG_MMU - unsigned long (*get_unmapped_area) (struct file *filp, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags); -#endif + unsigned long mmap_base; /* base of mmap area */ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES @@ -1142,14 +1167,15 @@ static inline void mm_init_cid(struct mm_struct *mm) cpumask_clear(mm_cidmask(mm)); } -static inline int mm_alloc_cid(struct mm_struct *mm) +static inline int mm_alloc_cid_noprof(struct mm_struct *mm) { - mm->pcpu_cid = alloc_percpu(struct mm_cid); + mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid); if (!mm->pcpu_cid) return -ENOMEM; mm_init_cid(mm); return 0; } +#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) static inline void mm_destroy_cid(struct mm_struct *mm) { @@ -1342,6 +1368,15 @@ enum fault_flag { typedef unsigned int __bitwise zap_flags_t; +/* Flags for clear_young_dirty_ptes(). */ +typedef int __bitwise cydp_t; + +/* Clear the access bit */ +#define CYDP_CLEAR_YOUNG ((__force cydp_t)BIT(0)) + +/* Clear the dirty bit */ +#define CYDP_CLEAR_DIRTY ((__force cydp_t)BIT(1)) + /* * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each * other. Here is what they mean, and how to use them: diff --git a/include/linux/mman.h b/include/linux/mman.h index dc7048824be8..bcb201ab7a41 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -162,6 +162,14 @@ calc_vm_flag_bits(unsigned long flags) unsigned long vm_commit_limit(void); +#ifndef arch_memory_deny_write_exec_supported +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return true; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported +#endif + /* * Denies creating a writable executable mapping or gaining executable permissions. * diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 8d38dcb6d044..de9dc20b01ba 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -60,16 +60,14 @@ static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) #endif /* CONFIG_TRACING */ -static inline void mmap_assert_locked(struct mm_struct *mm) +static inline void mmap_assert_locked(const struct mm_struct *mm) { - lockdep_assert_held(&mm->mmap_lock); - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); + rwsem_assert_held(&mm->mmap_lock); } -static inline void mmap_assert_write_locked(struct mm_struct *mm) +static inline void mmap_assert_write_locked(const struct mm_struct *mm) { - lockdep_assert_held_write(&mm->mmap_lock); - VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); + rwsem_assert_held_write(&mm->mmap_lock); } #ifdef CONFIG_PER_VMA_LOCK diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 2f445c651742..88c6a76042ee 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -433,8 +433,8 @@ struct mmc_host { mmc_pm_flag_t pm_caps; /* supported pm features */ /* host specific block data */ - unsigned int max_seg_size; /* see blk_queue_max_segment_size */ - unsigned short max_segs; /* see blk_queue_max_segments */ + unsigned int max_seg_size; /* lim->max_segment_size */ + unsigned short max_segs; /* lim->max_segments */ unsigned short unused; unsigned int max_req_size; /* maximum number of bytes in one req */ unsigned int max_blk_size; /* maximum size of one mmc block */ @@ -539,7 +539,7 @@ struct mmc_host *devm_mmc_alloc_host(struct device *dev, int extra); int mmc_add_host(struct mmc_host *); void mmc_remove_host(struct mmc_host *); void mmc_free_host(struct mmc_host *); -void mmc_of_parse_clk_phase(struct mmc_host *host, +void mmc_of_parse_clk_phase(struct device *dev, struct mmc_clk_phase_map *map); int mmc_of_parse(struct mmc_host *host); int mmc_of_parse_voltage(struct mmc_host *host, u32 *mask); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 478855b8e406..fed1f5f4a8d3 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -106,7 +106,10 @@ struct sdio_driver { .class = (dev_class), \ .vendor = SDIO_ANY_ID, .device = SDIO_ANY_ID -extern int sdio_register_driver(struct sdio_driver *); +/* use a macro to avoid include chaining to get THIS_MODULE */ +#define sdio_register_driver(drv) \ + __sdio_register_driver(drv, THIS_MODULE) +extern int __sdio_register_driver(struct sdio_driver *, struct module *); extern void sdio_unregister_driver(struct sdio_driver *); /** diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 7fada7a714fe..7cddfdac2f57 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -124,6 +124,7 @@ #define SDIO_DEVICE_ID_REALTEK_RTW8723DS_2ANT 0xd723 #define SDIO_DEVICE_ID_REALTEK_RTW8723DS_1ANT 0xd724 #define SDIO_DEVICE_ID_REALTEK_RTW8821DS 0xd821 +#define SDIO_DEVICE_ID_REALTEK_RTW8723CS 0xb703 #define SDIO_VENDOR_ID_SIANO 0x039a #define SDIO_DEVICE_ID_SIANO_NOVA_B0 0x0201 diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 5d3d15e97868..274a2767ea49 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -8,8 +8,8 @@ #ifndef MMC_SLOT_GPIO_H #define MMC_SLOT_GPIO_H +#include <linux/interrupt.h> #include <linux/types.h> -#include <linux/irqreturn.h> struct mmc_host; @@ -21,8 +21,8 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int debounce); int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, unsigned int idx, unsigned int debounce); -void mmc_gpio_set_cd_isr(struct mmc_host *host, - irqreturn_t (*isr)(int irq, void *dev_id)); +int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config); +void mmc_gpio_set_cd_isr(struct mmc_host *host, irq_handler_t isr); int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); void mmc_gpiod_request_cd_irq(struct mmc_host *host); bool mmc_can_gpio_cd(struct mmc_host *host); diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 7c3e7b0b0e8f..39a7714605a7 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -10,7 +10,7 @@ struct vm_area_struct; struct mm_struct; struct vma_iterator; -void dump_page(struct page *page, const char *reason); +void dump_page(const struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); void vma_iter_dump_tree(const struct vma_iterator *vmi); diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index f2b7a3f04099..bbaec80c78c5 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -11,7 +11,7 @@ #endif #ifndef leave_mm -static inline void leave_mm(int cpu) { } +static inline void leave_mm(void) { } #endif /* diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index f349e08a9dfe..d39ebb10caeb 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -123,15 +123,6 @@ struct mmu_notifier_ops { unsigned long address); /* - * change_pte is called in cases that pte mapping to page is changed: - * for example, when ksm remaps pte to point to a new shared page. - */ - void (*change_pte)(struct mmu_notifier *subscription, - struct mm_struct *mm, - unsigned long address, - pte_t pte); - - /* * invalidate_range_start() and invalidate_range_end() must be * paired and are called only when the mmap_lock and/or the * locks protecting the reverse maps are held. If the subsystem @@ -392,8 +383,6 @@ extern int __mmu_notifier_clear_young(struct mm_struct *mm, unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); -extern void __mmu_notifier_change_pte(struct mm_struct *mm, - unsigned long address, pte_t pte); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r); extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, @@ -439,13 +428,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm, return 0; } -static inline void mmu_notifier_change_pte(struct mm_struct *mm, - unsigned long address, pte_t pte) -{ - if (mm_has_notifiers(mm)) - __mmu_notifier_change_pte(mm, address, pte); -} - static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { @@ -581,26 +563,6 @@ static inline void mmu_notifier_range_init_owner( __young; \ }) -/* - * set_pte_at_notify() sets the pte _after_ running the notifier. - * This is safe to start by updating the secondary MMUs, because the primary MMU - * pte invalidate must have already happened with a ptep_clear_flush() before - * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is - * required when we change both the protection of the mapping from read-only to - * read-write and the pfn (like during copy on write page faults). Otherwise the - * old page would remain mapped readonly in the secondary MMUs after the new - * page is already writable by some CPU through the primary MMU. - */ -#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ -({ \ - struct mm_struct *___mm = __mm; \ - unsigned long ___address = __address; \ - pte_t ___pte = __pte; \ - \ - mmu_notifier_change_pte(___mm, ___address, ___pte); \ - set_pte_at(___mm, ___address, __ptep, ___pte); \ -}) - #else /* CONFIG_MMU_NOTIFIER */ struct mmu_notifier_range { @@ -650,11 +612,6 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm, return 0; } -static inline void mmu_notifier_change_pte(struct mm_struct *mm, - unsigned long address, pte_t pte) -{ -} - static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { @@ -693,7 +650,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pudp_huge_clear_flush_notify pudp_huge_clear_flush -#define set_pte_at_notify set_pte_at static inline void mmu_notifier_synchronize(void) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a497f189d988..8f9c9590a42c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -76,9 +76,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES]; #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) # define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA) +# define is_migrate_cma_folio(folio, pfn) (MIGRATE_CMA == \ + get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK)) #else # define is_migrate_cma(migratetype) false # define is_migrate_cma_page(_page) false +# define is_migrate_cma_folio(folio, pfn) false #endif static inline bool is_migrate_movable(int mt) @@ -202,7 +205,10 @@ enum node_stat_item { NR_KERNEL_SCS_KB, /* measured in KiB */ #endif NR_PAGETABLE, /* used for pagetables */ - NR_SECONDARY_PAGETABLE, /* secondary pagetables, e.g. KVM pagetables */ + NR_SECONDARY_PAGETABLE, /* secondary pagetables, KVM & IOMMU */ +#ifdef CONFIG_IOMMU_SUPPORT + NR_IOMMU_PAGES, /* # of pages allocated by IOMMU */ +#endif #ifdef CONFIG_SWAP NR_SWAPCACHE, #endif @@ -464,7 +470,7 @@ enum { #define NR_BLOOM_FILTERS 2 struct lru_gen_mm_state { - /* set to max_seq after each iteration */ + /* synced with max_seq after each iteration */ unsigned long seq; /* where the current iteration continues after */ struct list_head *head; @@ -479,8 +485,8 @@ struct lru_gen_mm_state { struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; - /* unstable max_seq from lru_gen_folio */ - unsigned long max_seq; + /* max_seq from lru_gen_folio: can be out of date */ + unsigned long seq; /* the next address within an mm to scan */ unsigned long next_addr; /* to batch promoted pages */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index f458469c5ce5..4338b1b4ac44 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -690,6 +690,8 @@ struct x86_cpu_id { __u16 model; __u16 steppings; __u16 feature; /* bit index */ + /* Solely for kernel-internal use: DO NOT EXPORT to userspace! */ + __u16 flags; kernel_ulong_t driver_data; }; @@ -960,4 +962,14 @@ struct vchiq_device_id { char name[32]; }; +/** + * struct coreboot_device_id - Identifies a coreboot table entry + * @tag: tag ID + * @driver_data: driver specific data + */ +struct coreboot_device_id { + __u32 tag; + kernel_ulong_t driver_data; +}; + #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/module.h b/include/linux/module.h index 96bc462872c0..ffa1c603163c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -605,6 +605,11 @@ static inline bool module_is_live(struct module *mod) return mod->state != MODULE_STATE_GOING; } +static inline bool module_is_coming(struct module *mod) +{ + return mod->state == MODULE_STATE_COMING; +} + struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); @@ -857,6 +862,10 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) return ptr; } +static inline bool module_is_coming(struct module *mod) +{ + return false; +} #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS @@ -885,7 +894,7 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE extern bool retpoline_module_ok(bool has_retpoline); #else static inline bool retpoline_module_ok(bool has_retpoline) diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 001b2ce83832..e395461d59e5 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -25,13 +25,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, /* Additional bytes needed by arch in front of individual sections */ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); -/* Allocator used for allocating struct module, core sections and init - sections. Returns NULL on failure. */ -void *module_alloc(unsigned long size); - -/* Free memory returned from module_alloc. */ -void module_memfree(void *module_region); - /* Determines if the section name is an init section (that is only used during * module loading). */ @@ -115,18 +108,18 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); +#ifdef CONFIG_MODULES +void flush_module_init_free_work(void); +#else +static inline void flush_module_init_free_work(void) +{ +} +#endif + /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); -#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ - !defined(CONFIG_KASAN_VMALLOC) -#include <linux/kasan.h> -#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) -#else -#define MODULE_ALIGN PAGE_SIZE -#endif - #endif diff --git a/include/linux/msi.h b/include/linux/msi.h index ddace8c34dcf..dc27cf3903d5 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -412,6 +412,7 @@ bool arch_restore_msi_irqs(struct pci_dev *dev); struct irq_domain; struct irq_domain_ops; struct irq_chip; +struct irq_fwspec; struct device_node; struct fwnode_handle; struct msi_domain_info; @@ -431,6 +432,8 @@ struct msi_domain_info; * function. * @msi_post_free: Optional function which is invoked after freeing * all interrupts. + * @msi_translate: Optional translate callback to support the odd wire to + * MSI bridges, e.g. MBIGEN * * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. @@ -468,6 +471,8 @@ struct msi_domain_ops { struct device *dev); void (*msi_post_free)(struct irq_domain *domain, struct device *dev); + int (*msi_translate)(struct irq_domain *domain, struct irq_fwspec *fwspec, + irq_hw_number_t *hwirq, unsigned int *type); }; /** @@ -547,6 +552,10 @@ enum { MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ MSI_FLAG_FREE_MSI_DESCS = (1 << 6), + /* Use dev->fwnode for MSI device domain creation */ + MSI_FLAG_USE_DEV_FWNODE = (1 << 7), + /* Set parent->dev into domain->pm_dev on device domain creation */ + MSI_FLAG_PARENT_PM_DEV = (1 << 8), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), @@ -564,14 +573,17 @@ enum { MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */ MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), - /* Support for PCI/IMS */ - MSI_FLAG_PCI_IMS = (1 << 21), }; /** * struct msi_parent_ops - MSI parent domain callbacks and configuration info * * @supported_flags: Required: The supported MSI flags of the parent domain + * @required_flags: Optional: The required MSI flags of the parent MSI domain + * @bus_select_token: Optional: The bus token of the real parent domain for + * irq_domain::select() + * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for + * irq_domain::select() * @prefix: Optional: Prefix for the domain and chip name * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent * domain specific domain flags, domain ops and interrupt chip @@ -579,6 +591,9 @@ enum { */ struct msi_parent_ops { u32 supported_flags; + u32 required_flags; + u32 bus_select_token; + u32 bus_select_mask; const char *prefix; bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain, struct irq_domain *msi_parent_domain, @@ -627,9 +642,6 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, - irq_write_msi_msg_t write_msi_msg); -void platform_msi_domain_free_irqs(struct device *dev); /* When an MSI domain is used as an intermediate domain */ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, @@ -656,8 +668,18 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); +/* Per device platform MSI */ +int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg); +void platform_device_msi_free_irqs_all(struct device *dev); bool msi_device_has_isolated_msi(struct device *dev); + +static inline int msi_domain_alloc_irqs(struct device *dev, unsigned int domid, int nirqs) +{ + return msi_domain_alloc_irqs_range(dev, domid, 0, nirqs - 1); +} + #else /* CONFIG_GENERIC_MSI_IRQ */ static inline bool msi_device_has_isolated_msi(struct device *dev) { diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h index 391087ad99b1..5ae72d1912c4 100644 --- a/include/linux/msi_api.h +++ b/include/linux/msi_api.h @@ -15,7 +15,6 @@ struct device; */ enum msi_domain_ids { MSI_DEFAULT_DOMAIN, - MSI_SECONDARY_DOMAIN, MSI_MAX_DEVICE_IRQDOMAINS, }; diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index c04f690871ca..9798c1a1d3b6 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -13,6 +13,7 @@ */ #include <linux/sched.h> #include <linux/mutex.h> +#include <linux/wait.h> typedef enum { FL_READY, diff --git a/include/linux/mtd/lpc32xx_mlc.h b/include/linux/mtd/lpc32xx_mlc.h index d168c628c0d5..35e971be0950 100644 --- a/include/linux/mtd/lpc32xx_mlc.h +++ b/include/linux/mtd/lpc32xx_mlc.h @@ -11,7 +11,7 @@ #include <linux/dmaengine.h> struct lpc32xx_mlc_platform_data { - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; }; #endif /* __LINUX_MTD_LPC32XX_MLC_H */ diff --git a/include/linux/mtd/lpc32xx_slc.h b/include/linux/mtd/lpc32xx_slc.h index cf54a9f80460..a044b806566b 100644 --- a/include/linux/mtd/lpc32xx_slc.h +++ b/include/linux/mtd/lpc32xx_slc.h @@ -11,7 +11,7 @@ #include <linux/dmaengine.h> struct lpc32xx_slc_platform_data { - bool (*dma_filter)(struct dma_chan *chan, void *filter_param); + dma_filter_fn dma_filter; }; #endif /* __LINUX_MTD_LPC32XX_SLC_H */ diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 914a9f974baa..8d10d9d2e830 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -223,7 +223,7 @@ struct mtd_part { * @partitions_lock: lock protecting accesses to the partition list. Protects * not only the master partition list, but also all * sub-partitions. - * @suspended: et to 1 when the device is suspended, 0 otherwise + * @suspended: set to 1 when the device is suspended, 0 otherwise * * This struct is embedded in mtd_info and contains master-specific * properties/fields. The master is the root MTD device from the MTD partition diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index badb4c1ac079..5c19ead60499 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -169,7 +169,7 @@ struct spinand_op; struct spinand_device; -#define SPINAND_MAX_ID_LEN 4 +#define SPINAND_MAX_ID_LEN 5 /* * For erase, write and read operation, we got the following timings : * tBERS (erase) 1ms to 4ms diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index a529347fd75b..562f92504f2b 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -192,6 +192,7 @@ struct ubi_device_info { * or a volume was removed) * @UBI_VOLUME_RESIZED: a volume has been re-sized * @UBI_VOLUME_RENAMED: a volume has been re-named + * @UBI_VOLUME_SHUTDOWN: a volume is going to removed, shutdown users * @UBI_VOLUME_UPDATED: data has been written to a volume * * These constants define which type of event has happened when a volume @@ -202,6 +203,7 @@ enum { UBI_VOLUME_REMOVED, UBI_VOLUME_RESIZED, UBI_VOLUME_RENAMED, + UBI_VOLUME_SHUTDOWN, UBI_VOLUME_UPDATED, }; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 7e208d46ba5b..a561c629d89f 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -22,6 +22,8 @@ #include <linux/cleanup.h> #include <linux/mutex_types.h> +struct device; + #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ , .dep_map = { \ @@ -32,11 +34,9 @@ # define __DEP_MAP_MUTEX_INITIALIZER(lockname) #endif -#ifndef CONFIG_PREEMPT_RT - #ifdef CONFIG_DEBUG_MUTEXES -#define __DEBUG_MUTEX_INITIALIZER(lockname) \ +# define __DEBUG_MUTEX_INITIALIZER(lockname) \ , .magic = &lockname extern void mutex_destroy(struct mutex *lock); @@ -49,6 +49,7 @@ static inline void mutex_destroy(struct mutex *lock) {} #endif +#ifndef CONFIG_PREEMPT_RT /** * mutex_init - initialize the mutex * @mutex: the mutex to be initialized @@ -101,9 +102,6 @@ extern bool mutex_is_locked(struct mutex *lock); extern void __mutex_rt_init(struct mutex *lock, const char *name, struct lock_class_key *key); -extern int mutex_trylock(struct mutex *lock); - -static inline void mutex_destroy(struct mutex *lock) { } #define mutex_is_locked(l) rt_mutex_base_is_locked(&(l)->rtmutex) @@ -121,6 +119,31 @@ do { \ } while (0) #endif /* CONFIG_PREEMPT_RT */ +#ifdef CONFIG_DEBUG_MUTEXES + +int __devm_mutex_init(struct device *dev, struct mutex *lock); + +#else + +static inline int __devm_mutex_init(struct device *dev, struct mutex *lock) +{ + /* + * When CONFIG_DEBUG_MUTEXES is off mutex_destroy() is just a nop so + * no really need to register it in the devm subsystem. + */ + return 0; +} + +#endif + +#define devm_mutex_init(dev, mutex) \ +({ \ + typeof(mutex) mutex_ = (mutex); \ + \ + mutex_init(mutex_); \ + __devm_mutex_init(dev, mutex_); \ +}) + /* * See kernel/locking/mutex.c for detailed documentation of these APIs. * Also see Documentation/locking/mutex-design.rst. diff --git a/include/linux/namei.h b/include/linux/namei.h index 74e0cc14ebf8..967aa9ea9f96 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ #define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ +#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) diff --git a/include/linux/net.h b/include/linux/net.h index c9b4a63791a4..688320b79fcc 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -153,6 +153,7 @@ struct sockaddr; struct msghdr; struct module; struct sk_buff; +struct proto_accept_arg; typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); @@ -171,7 +172,8 @@ struct proto_ops { int (*socketpair)(struct socket *sock1, struct socket *sock2); int (*accept) (struct socket *sock, - struct socket *newsock, int flags, bool kern); + struct socket *newsock, + struct proto_accept_arg *arg); int (*getname) (struct socket *sock, struct sockaddr *addr, int peer); @@ -299,10 +301,7 @@ do { \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else #define net_dbg_ratelimited(fmt, ...) \ - do { \ - if (0) \ - no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ - } while (0) + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) #endif #define net_get_random_once(buf, nbytes) \ diff --git a/include/linux/net/intel/libie/rx.h b/include/linux/net/intel/libie/rx.h new file mode 100644 index 000000000000..8e97775f1d66 --- /dev/null +++ b/include/linux/net/intel/libie/rx.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBIE_RX_H +#define __LIBIE_RX_H + +#include <net/libeth/rx.h> + +/* Rx buffer management */ + +/* The largest size for a single descriptor as per HW */ +#define LIBIE_MAX_RX_BUF_LEN 9728U +/* "True" HW-writeable space: minimum from SW and HW values */ +#define LIBIE_RX_BUF_LEN(hr) min_t(u32, LIBETH_RX_PAGE_LEN(hr), \ + LIBIE_MAX_RX_BUF_LEN) + +/* The maximum frame size as per HW (S/G) */ +#define __LIBIE_MAX_RX_FRM_LEN 16382U +/* ATST, HW can chain up to 5 Rx descriptors */ +#define LIBIE_MAX_RX_FRM_LEN(hr) \ + min_t(u32, __LIBIE_MAX_RX_FRM_LEN, LIBIE_RX_BUF_LEN(hr) * 5) +/* Maximum frame size minus LL overhead */ +#define LIBIE_MAX_MTU \ + (LIBIE_MAX_RX_FRM_LEN(LIBETH_MAX_HEADROOM) - LIBETH_RX_LL_LEN) + +/* O(1) converting i40e/ice/iavf's 8/10-bit hardware packet type to a parsed + * bitfield struct. + */ + +#define LIBIE_RX_PT_NUM 154 + +extern const struct libeth_rx_pt libie_rx_pt_lut[LIBIE_RX_PT_NUM]; + +/** + * libie_rx_pt_parse - convert HW packet type to software bitfield structure + * @pt: 10-bit hardware packet type value from the descriptor + * + * ```libie_rx_pt_lut``` must be accessed only using this wrapper. + * + * Return: parsed bitfield struct corresponding to the provided ptype. + */ +static inline struct libeth_rx_pt libie_rx_pt_parse(u32 pt) +{ + if (unlikely(pt >= LIBIE_RX_PT_NUM)) + pt = 0; + + return libie_rx_pt_lut[pt]; +} + +#endif /* __LIBIE_RX_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 78a09af89e39..d20c6c99eb88 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -59,7 +59,7 @@ struct ethtool_ops; struct kernel_hwtstamp_config; struct phy_device; struct dsa_port; -struct ip_tunnel_parm; +struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; struct netdev_name_node; @@ -225,12 +225,6 @@ struct net_device_core_stats { #include <linux/cache.h> #include <linux/skbuff.h> -#ifdef CONFIG_RPS -#include <linux/static_key.h> -extern struct static_key_false rps_needed; -extern struct static_key_false rfs_needed; -#endif - struct neighbour; struct neigh_parms; struct sk_buff; @@ -730,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node #endif } -#ifdef CONFIG_RPS -/* - * This structure holds an RPS map which can be of variable length. The - * map is an array of CPUs. - */ -struct rps_map { - unsigned int len; - struct rcu_head rcu; - u16 cpus[]; -}; -#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) - -/* - * The rps_dev_flow structure contains the mapping of a flow to a CPU, the - * tail pointer for that CPU's input queue at the time of last enqueue, and - * a hardware filter index. - */ -struct rps_dev_flow { - u16 cpu; - u16 filter; - unsigned int last_qtail; -}; -#define RPS_NO_FILTER 0xffff - -/* - * The rps_dev_flow_table structure contains a table of flow mappings. - */ -struct rps_dev_flow_table { - unsigned int mask; - struct rcu_head rcu; - struct rps_dev_flow flows[]; -}; -#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ - ((_num) * sizeof(struct rps_dev_flow))) - -/* - * The rps_sock_flow_table contains mappings of flows to the last CPU - * on which they were processed by the application (set in recvmsg). - * Each entry is a 32bit value. Upper part is the high-order bits - * of flow hash, lower part is CPU number. - * rps_cpu_mask is used to partition the space, depending on number of - * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 - * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, - * meaning we use 32-6=26 bits for the hash. - */ -struct rps_sock_flow_table { - u32 mask; - - u32 ents[] ____cacheline_aligned_in_smp; -}; -#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) - -#define RPS_NO_CPU 0xffff - -extern u32 rps_cpu_mask; -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - -static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, - u32 hash) -{ - if (table && hash) { - unsigned int index = hash & table->mask; - u32 val = hash & ~rps_cpu_mask; - - /* We only give a hint, preemption can change CPU under us */ - val |= raw_smp_processor_id(); - - /* The following WRITE_ONCE() is paired with the READ_ONCE() - * here, and another one in get_rps_cpu(). - */ - if (READ_ONCE(table->ents[index]) != val) - WRITE_ONCE(table->ents[index], val); - } -} - #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); #endif -#endif /* CONFIG_RPS */ /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type { @@ -1060,7 +978,7 @@ struct xfrmdev_ops { bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); - void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); + void (*xdo_dev_state_update_stats) (struct xfrm_state *x); int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); @@ -1409,7 +1327,7 @@ struct netdev_net_notifier { * queue id bound to an AF_XDP socket. The flags field specifies if * only RX, only Tx, or both should be woken up using the flags * XDP_WAKEUP_RX and XDP_WAKEUP_TX. - * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, + * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm_kern *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); @@ -1665,7 +1583,8 @@ struct net_device_ops { int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); int (*ndo_tunnel_ctl)(struct net_device *dev, - struct ip_tunnel_parm *p, int cmd); + struct ip_tunnel_parm_kern *p, + int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); @@ -1813,6 +1732,15 @@ enum netdev_stat_type { NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ }; +enum netdev_reg_state { + NETREG_UNINITIALIZED = 0, + NETREG_REGISTERED, /* completed register_netdevice */ + NETREG_UNREGISTERING, /* called unregister_netdevice */ + NETREG_UNREGISTERED, /* completed unregister todo */ + NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ +}; + /** * struct net_device - The DEVICE structure. * @@ -2028,6 +1956,8 @@ enum netdev_stat_type { * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops + * @stat_ops: Optional ops for queue-aware statistics + * @queue_mgmt_ops: Optional ops for queue management * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size @@ -2144,6 +2074,7 @@ struct net_device { struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; }; + unsigned long state; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; @@ -2189,7 +2120,6 @@ struct net_device { * part of the usual set specified in Space.c. */ - unsigned long state; struct list_head dev_list; struct list_head napi_list; @@ -2252,7 +2182,7 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif - unsigned char operstate; + unsigned int operstate; unsigned char link_mode; unsigned char if_port; @@ -2375,13 +2305,7 @@ struct net_device { struct list_head link_watch_list; - enum { NETREG_UNINITIALIZED=0, - NETREG_REGISTERED, /* completed register_netdevice */ - NETREG_UNREGISTERING, /* called unregister_netdevice */ - NETREG_UNREGISTERED, /* completed unregister todo */ - NETREG_RELEASED, /* called free_netdev */ - NETREG_DUMMY, /* dummy device for NAPI poll */ - } reg_state:8; + u8 reg_state; bool dismantle; @@ -2414,6 +2338,10 @@ struct net_device { const struct rtnl_link_ops *rtnl_link_ops; + const struct netdev_stat_ops *stat_ops; + + const struct netdev_queue_mgmt_ops *queue_mgmt_ops; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u @@ -2443,8 +2371,8 @@ struct net_device { struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; + bool threaded; unsigned wol_enabled:1; - unsigned threaded:1; struct list_head net_notifier_list; @@ -3072,8 +3000,6 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev); int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info); -extern rwlock_t dev_base_lock; /* Device list lock */ - #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_reverse(net, d) \ @@ -3196,7 +3122,7 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); void netdev_freemem(struct net_device *dev); -int init_dummy_netdev(struct net_device *dev); +void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, @@ -3211,6 +3137,7 @@ struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); struct net_device *dev_get_by_napi_id(unsigned int napi_id); +void netdev_copy_name(struct net_device *dev, char *name); static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, @@ -3281,6 +3208,7 @@ struct softnet_data { struct softnet_data *rps_ipi_list; #endif + unsigned int received_rps; bool in_net_rx_action; bool in_napi_threaded_poll; @@ -3313,11 +3241,11 @@ struct softnet_data { unsigned int cpu; unsigned int input_queue_tail; #endif - unsigned int received_rps; - unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; + atomic_t dropped ____cacheline_aligned_in_smp; + /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; int defer_count; @@ -3326,21 +3254,6 @@ struct softnet_data { call_single_data_t defer_csd; }; -static inline void input_queue_head_incr(struct softnet_data *sd) -{ -#ifdef CONFIG_RPS - sd->input_queue_head++; -#endif -} - -static inline void input_queue_tail_incr_save(struct softnet_data *sd, - unsigned int *qtail) -{ -#ifdef CONFIG_RPS - *qtail = ++sd->input_queue_tail; -#endif -} - DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); static inline int dev_recursion_level(void) @@ -3348,23 +3261,6 @@ static inline int dev_recursion_level(void) return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 8 -static inline bool dev_xmit_recursion(void) -{ - return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > - XMIT_RECURSION_LIMIT); -} - -static inline void dev_xmit_recursion_inc(void) -{ - __this_cpu_inc(softnet_data.xmit.recursion); -} - -static inline void dev_xmit_recursion_dec(void) -{ - __this_cpu_dec(softnet_data.xmit.recursion); -} - void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -3966,7 +3862,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); @@ -3979,8 +3875,6 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); -struct packet_offload *gro_find_receive_by_type(__be16 type); -struct packet_offload *gro_find_complete_by_type(__be16 type); static inline void napi_free_frags(struct napi_struct *napi) { @@ -4207,6 +4101,8 @@ static inline void dev_put(struct net_device *dev) netdev_put(dev, NULL); } +DEFINE_FREE(dev_put, struct net_device *, if (_T) dev_put(_T)) + static inline void netdev_ref_replace(struct net_device *odev, struct net_device *ndev, netdevice_tracker *tracker, @@ -4350,8 +4246,10 @@ static inline bool netif_testing(const struct net_device *dev) */ static inline bool netif_oper_up(const struct net_device *dev) { - return (dev->operstate == IF_OPER_UP || - dev->operstate == IF_OPER_UNKNOWN /* backward compat */); + unsigned int operstate = READ_ONCE(dev->operstate); + + return operstate == IF_OPER_UP || + operstate == IF_OPER_UNKNOWN /* backward compat */; } /** @@ -4623,6 +4521,9 @@ static inline void netif_addr_unlock_bh(struct net_device *dev) void ether_setup(struct net_device *dev); +/* Allocate dummy net_device */ +struct net_device *alloc_netdev_dummy(int sizeof_priv); + /* Support for loadable net-drivers */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, @@ -4790,11 +4691,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); -extern int netdev_max_backlog; -extern int dev_rx_weight; -extern int dev_tx_weight; -extern int gro_normal_batch; - enum { NESTED_SYNC_IMM_BIT, NESTED_SYNC_TODO_BIT, @@ -5251,7 +5147,9 @@ static inline const char *netdev_name(const struct net_device *dev) static inline const char *netdev_reg_state(const struct net_device *dev) { - switch (dev->reg_state) { + u8 reg_state = READ_ONCE(dev->reg_state); + + switch (reg_state) { case NETREG_UNINITIALIZED: return " (uninitialized)"; case NETREG_REGISTERED: return ""; case NETREG_UNREGISTERING: return " (unregistering)"; @@ -5260,7 +5158,7 @@ static inline const char *netdev_reg_state(const struct net_device *dev) case NETREG_DUMMY: return " (dummy)"; } - WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, dev->reg_state); + WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state); return " (unknown)"; } @@ -5302,7 +5200,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev) #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) -extern struct list_head ptype_all __read_mostly; extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ce660d51549b..2683b2b77612 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -370,7 +370,6 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); -int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry); #include <net/flow.h> diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 100cbb261269..d2d291a9cdad 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -20,95 +20,24 @@ #include <linux/uio.h> enum netfs_sreq_ref_trace; - -/* - * Overload PG_private_2 to give us PG_fscache - this is used to indicate that - * a page is currently backed by a local disk cache - */ -#define folio_test_fscache(folio) folio_test_private_2(folio) -#define PageFsCache(page) PagePrivate2((page)) -#define SetPageFsCache(page) SetPagePrivate2((page)) -#define ClearPageFsCache(page) ClearPagePrivate2((page)) -#define TestSetPageFsCache(page) TestSetPagePrivate2((page)) -#define TestClearPageFsCache(page) TestClearPagePrivate2((page)) +typedef struct mempool_s mempool_t; /** - * folio_start_fscache - Start an fscache write on a folio. + * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED] * @folio: The folio. * * Call this function before writing a folio to a local cache. Starting a * second write before the first one finishes is not allowed. + * + * Note that this should no longer be used. */ -static inline void folio_start_fscache(struct folio *folio) +static inline void folio_start_private_2(struct folio *folio) { VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio); folio_get(folio); folio_set_private_2(folio); } -/** - * folio_end_fscache - End an fscache write on a folio. - * @folio: The folio. - * - * Call this function after the folio has been written to the local cache. - * This will wake any sleepers waiting on this folio. - */ -static inline void folio_end_fscache(struct folio *folio) -{ - folio_end_private_2(folio); -} - -/** - * folio_wait_fscache - Wait for an fscache write on this folio to end. - * @folio: The folio. - * - * If this folio is currently being written to a local cache, wait for - * the write to finish. Another write may start after this one finishes, - * unless the caller holds the folio lock. - */ -static inline void folio_wait_fscache(struct folio *folio) -{ - folio_wait_private_2(folio); -} - -/** - * folio_wait_fscache_killable - Wait for an fscache write on this folio to end. - * @folio: The folio. - * - * If this folio is currently being written to a local cache, wait - * for the write to finish or for a fatal signal to be received. - * Another write may start after this one finishes, unless the caller - * holds the folio lock. - * - * Return: - * - 0 if successful. - * - -EINTR if a fatal signal was encountered. - */ -static inline int folio_wait_fscache_killable(struct folio *folio) -{ - return folio_wait_private_2_killable(folio); -} - -static inline void set_page_fscache(struct page *page) -{ - folio_start_fscache(page_folio(page)); -} - -static inline void end_page_fscache(struct page *page) -{ - folio_end_private_2(page_folio(page)); -} - -static inline void wait_on_page_fscache(struct page *page) -{ - folio_wait_private_2(page_folio(page)); -} - -static inline int wait_on_page_fscache_killable(struct page *page) -{ - return folio_wait_private_2_killable(page_folio(page)); -} - /* Marks used on xarray-based buffers */ #define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ #define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ @@ -135,6 +64,7 @@ struct netfs_inode { #if IS_ENABLED(CONFIG_FSCACHE) struct fscache_cookie *cache; #endif + struct mutex wb_lock; /* Writeback serialisation */ loff_t remote_i_size; /* Size of the remote file */ loff_t zero_point; /* Size after which we assume there's no data * on the server */ @@ -142,7 +72,8 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ -#define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */ +#define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark + * write to cache on read */ }; /* @@ -165,16 +96,25 @@ struct netfs_folio { unsigned int dirty_len; /* Write-streaming dirty data length */ }; #define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ +#define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */ -static inline struct netfs_folio *netfs_folio_info(struct folio *folio) +static inline bool netfs_is_folio_info(const void *priv) { - void *priv = folio_get_private(folio); + return (unsigned long)priv & NETFS_FOLIO_INFO; +} - if ((unsigned long)priv & NETFS_FOLIO_INFO) +static inline struct netfs_folio *__netfs_folio_info(const void *priv) +{ + if (netfs_is_folio_info(priv)) return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); return NULL; } +static inline struct netfs_folio *netfs_folio_info(struct folio *folio) +{ + return __netfs_folio_info(folio_get_private(folio)); +} + static inline struct netfs_group *netfs_folio_group(struct folio *folio) { struct netfs_folio *finfo; @@ -187,6 +127,33 @@ static inline struct netfs_group *netfs_folio_group(struct folio *folio) } /* + * Stream of I/O subrequests going to a particular destination, such as the + * server or the local cache. This is mainly intended for writing where we may + * have to write to multiple destinations concurrently. + */ +struct netfs_io_stream { + /* Submission tracking */ + struct netfs_io_subrequest *construct; /* Op being constructed */ + unsigned int submit_off; /* Folio offset we're submitting from */ + unsigned int submit_len; /* Amount of data left to submit */ + unsigned int submit_max_len; /* Amount I/O can be rounded up to */ + void (*prepare_write)(struct netfs_io_subrequest *subreq); + void (*issue_write)(struct netfs_io_subrequest *subreq); + /* Collection tracking */ + struct list_head subrequests; /* Contributory I/O operations */ + struct netfs_io_subrequest *front; /* Op being collected */ + unsigned long long collected_to; /* Position we've collected results to */ + size_t transferred; /* The amount transferred from this stream */ + enum netfs_io_source source; /* Where to read from/write to */ + unsigned short error; /* Aggregate error for the stream */ + unsigned char stream_nr; /* Index of stream in parent table */ + bool avail; /* T if stream is available */ + bool active; /* T if stream is active */ + bool need_retry; /* T if this stream needs retrying */ + bool failed; /* T if this stream failed */ +}; + +/* * Resources required to do operations on a cache. */ struct netfs_cache_resources { @@ -209,14 +176,17 @@ struct netfs_io_subrequest { struct work_struct work; struct list_head rreq_link; /* Link in rreq->subrequests */ struct iov_iter io_iter; /* Iterator for this subrequest */ - loff_t start; /* Where to start the I/O */ + unsigned long long start; /* Where to start the I/O */ + size_t max_len; /* Maximum size of the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ + unsigned int nr_segs; /* Number of segs in io_iter */ unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */ enum netfs_io_source source; /* Where to read from/write to */ + unsigned char stream_nr; /* I/O stream this belongs to */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ @@ -224,15 +194,20 @@ struct netfs_io_subrequest { #define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ +#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ +#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */ +#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */ +#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */ +#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */ }; enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ - NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_DIO_WRITE, /* This is a direct I/O write */ @@ -254,26 +229,36 @@ struct netfs_io_request { struct netfs_cache_resources cache_resources; struct list_head proc_link; /* Link in netfs_iorequests */ struct list_head subrequests; /* Contributory I/O operations */ + struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ +#define NR_IO_STREAMS 2 //wreq->nr_io_streams + struct netfs_group *group; /* Writeback group being written back */ struct iov_iter iter; /* Unencrypted-side iterator */ struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ void *netfs_priv; /* Private data for the netfs */ + void *netfs_priv2; /* Private data for the netfs */ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ unsigned int debug_id; unsigned int rsize; /* Maximum read size (0 for none) */ unsigned int wsize; /* Maximum write size (0 for none) */ - unsigned int subreq_counter; /* Next subreq->debug_index */ + atomic_t subreq_counter; /* Next subreq->debug_index */ + unsigned int nr_group_rel; /* Number of refs to release on ->group */ + spinlock_t lock; /* Lock for queuing subreqs */ atomic_t nr_outstanding; /* Number of ops in progress */ atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */ - size_t submitted; /* Amount submitted for I/O so far */ - size_t len; /* Length of the request */ size_t upper_len; /* Length can be extended to here */ + unsigned long long submitted; /* Amount submitted for I/O so far */ + unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ short error; /* 0 or error that occurred */ enum netfs_io_origin origin; /* Origin of the request */ bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ - loff_t i_size; /* Size of the file */ - loff_t start; /* Start position */ + unsigned long long i_size; /* Size of the file */ + unsigned long long start; /* Start position */ + atomic64_t issued_to; /* Write issuer folio cursor */ + unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */ + unsigned long long collected_to; /* Point we've collected to */ + unsigned long long cleaned_to; /* Position we've cleaned folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ refcount_t ref; unsigned long flags; @@ -287,6 +272,11 @@ struct netfs_io_request { #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ +#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ +#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ +#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark + * write to cache on read */ const struct netfs_request_ops *netfs_ops; void (*cleanup)(struct netfs_io_request *req); }; @@ -295,8 +285,8 @@ struct netfs_io_request { * Operations the network filesystem can/must provide to the helpers. */ struct netfs_request_ops { - unsigned int io_request_size; /* Alloc size for netfs_io_request struct */ - unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */ + mempool_t *request_pool; + mempool_t *subrequest_pool; int (*init_request)(struct netfs_io_request *rreq, struct file *file); void (*free_request)(struct netfs_io_request *rreq); void (*free_subrequest)(struct netfs_io_subrequest *rreq); @@ -312,10 +302,13 @@ struct netfs_request_ops { /* Modification handling */ void (*update_i_size)(struct inode *inode, loff_t i_size); + void (*post_modify)(struct inode *inode); /* Write request handling */ - void (*create_write_requests)(struct netfs_io_request *wreq, - loff_t start, size_t len); + void (*begin_writeback)(struct netfs_io_request *wreq); + void (*prepare_write)(struct netfs_io_subrequest *subreq); + void (*issue_write)(struct netfs_io_subrequest *subreq); + void (*retry_request)(struct netfs_io_request *wreq, struct netfs_io_stream *stream); void (*invalidate_cache)(struct netfs_io_request *wreq); }; @@ -350,15 +343,27 @@ struct netfs_cache_ops { netfs_io_terminated_t term_func, void *term_func_priv); + /* Write data to the cache from a netfs subrequest. */ + void (*issue_write)(struct netfs_io_subrequest *subreq); + /* Expand readahead request */ void (*expand_readahead)(struct netfs_cache_resources *cres, - loff_t *_start, size_t *_len, loff_t i_size); + unsigned long long *_start, + unsigned long long *_len, + unsigned long long i_size); /* Prepare a read operation, shortening it to a cached/uncached * boundary as appropriate. */ enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq, - loff_t i_size); + unsigned long long i_size); + + /* Prepare a write subrequest, working out if we're allowed to do it + * and finding out the maximum amount of data to gather before + * attempting to submit. If we're not permitted to do it, the + * subrequest should be marked failed. + */ + void (*prepare_write_subreq)(struct netfs_io_subrequest *subreq); /* Prepare a write operation, working out what part of the write we can * actually do. @@ -384,6 +389,7 @@ struct netfs_cache_ops { }; /* High-level read API. */ +ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter); ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter); ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); @@ -394,6 +400,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from, struct netfs_group *netfs_group); ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from); +ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter, + struct netfs_group *netfs_group); ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); /* Address operations API */ @@ -410,7 +418,6 @@ int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); void netfs_clear_inode_writeback(struct inode *inode, const void *aux); void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool netfs_release_folio(struct folio *folio, gfp_t gfp); -int netfs_launder_folio(struct folio *folio); /* VMA operations API. */ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); @@ -426,9 +433,7 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); -struct netfs_io_subrequest *netfs_create_write_request( - struct netfs_io_request *wreq, enum netfs_io_source dest, - loff_t start, size_t len, work_func_t worker); +void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, bool was_async); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); @@ -472,6 +477,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx, #if IS_ENABLED(CONFIG_FSCACHE) ctx->cache = NULL; #endif + mutex_init(&ctx->wb_lock); /* ->releasepage() drives zero_point */ if (use_zero_point) { ctx->zero_point = ctx->remote_i_size; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1a4445bf2ab9..5df7340d4dab 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -291,6 +291,7 @@ struct netlink_callback { u16 answer_flags; u32 min_dump_alloc; unsigned int prev_seq, seq; + int flags; bool strict_check; union { u8 ctx[48]; @@ -323,6 +324,7 @@ struct netlink_dump_control { void *data; struct module *module; u32 min_dump_alloc; + int flags; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index ef8d2d618d5b..0d896ce296ce 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -701,6 +701,12 @@ enum state_protect_how4 { SP4_SSV = 2 }; +/* GET_DIR_DELEGATION non-fatal status codes */ +enum gddrnf4_status { + GDD4_OK = 0, + GDD4_UNAVAIL = 1 +}; + enum pnfs_layouttype { LAYOUT_NFSV4_1_FILES = 1, LAYOUT_OSD2_OBJECTS = 2, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f5ce7b101146..039898d70954 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -561,6 +561,9 @@ extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openfl extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block); +extern int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned int open_flags, + umode_t mode); /* * linux/fs/nfs/symlink.c @@ -611,6 +614,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +void nfs_commit_begin(struct nfs_mds_commit_info *cinfo); bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline bool nfs_have_writebacks(const struct inode *inode) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 539b57fbf3ce..d09b9773b20c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1821,13 +1821,6 @@ struct nfs_rpc_ops { }; /* - * NFS_CALL(getattr, inode, (fattr)); - * into - * NFS_PROTO(inode)->getattr(fattr); - */ -#define NFS_CALL(op, inode, args) NFS_PROTO(inode)->op args - -/* * Function vectors etc. for the NFS client */ extern const struct nfs_rpc_ops nfs_v2_clientops; diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e92e378df000..a8dfb38c9bb6 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -105,10 +105,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_cleanup(void); +extern void hardlockup_config_perf_event(const char *str); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } +static inline void hardlockup_config_perf_event(const char *str) { } #endif void watchdog_hardlockup_stop(void); @@ -216,13 +218,6 @@ void watchdog_update_hrtimer_threshold(u64 period); static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif -struct ctl_table; -int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); -int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); - #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include <asm/nmi.h> #endif diff --git a/include/linux/node.h b/include/linux/node.h index 25b66d705ee2..dfc004e4bee7 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -34,6 +34,18 @@ struct access_coordinate { unsigned int write_latency; }; +/* + * ACCESS_COORDINATE_LOCAL correlates to ACCESS CLASS 0 + * - access_coordinate between target node and nearest initiator node + * ACCESS_COORDINATE_CPU correlates to ACCESS CLASS 1 + * - access_coordinate between target node and nearest CPU node + */ +enum access_coordinate_class { + ACCESS_COORDINATE_LOCAL, + ACCESS_COORDINATE_CPU, + ACCESS_COORDINATE_MAX +}; + enum cache_indexing { NODE_CACHE_DIRECT_MAP, NODE_CACHE_INDEXED, @@ -66,7 +78,7 @@ struct node_cache_attrs { #ifdef CONFIG_HMEM_REPORTING void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, - unsigned access); + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -75,7 +87,7 @@ static inline void node_add_cache(unsigned int nid, static inline void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, - unsigned access) + enum access_coordinate_class access) { } #endif @@ -137,7 +149,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, - unsigned access); + enum access_coordinate_class access); #else static inline void node_dev_init(void) { diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 0f1d024bd958..7d22ea50b098 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -7,7 +7,7 @@ struct proc_ns_operations; struct ns_common { - atomic_long_t stashed; + struct dentry *stashed; const struct proc_ns_operations *ops; unsigned int inum; refcount_t count; diff --git a/include/linux/numa.h b/include/linux/numa.h index 915033a75731..1d43371fafd2 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -36,12 +36,7 @@ int memory_add_physaddr_to_nid(u64 start); int phys_to_target_node(u64 start); #endif -#ifndef numa_fill_memblks -static inline int __init numa_fill_memblks(u64 start, u64 end) -{ - return NUMA_NO_MEMBLK; -} -#endif +int numa_fill_memblks(u64 start, u64 end); #else /* !CONFIG_NUMA */ static inline int numa_nearest_node(int node, unsigned int state) diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 4dd7e6fe92fb..eb2f04d636c8 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -6,7 +6,11 @@ #ifndef _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H -#define NVME_RDMA_MAX_QUEUE_SIZE 128 +#define NVME_RDMA_IP_PORT 4420 + +#define NVME_RDMA_MAX_QUEUE_SIZE 256 +#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128 +#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128 enum nvme_rdma_cm_fmt { NVME_RDMA_CM_FMT_1_0 = 0x0, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3ef4053ea950..425573202295 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -23,8 +23,6 @@ #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" -#define NVME_RDMA_IP_PORT 4420 - #define NVME_NSID_ALL 0xffffffff enum nvme_subsys_type { diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index f0ba0e03218f..3ebeaa0ded00 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -199,7 +199,10 @@ int nvmem_add_one_cell(struct nvmem_device *nvmem, int nvmem_layout_register(struct nvmem_layout *layout); void nvmem_layout_unregister(struct nvmem_layout *layout); -int nvmem_layout_driver_register(struct nvmem_layout_driver *drv); +#define nvmem_layout_driver_register(drv) \ + __nvmem_layout_driver_register(drv, THIS_MODULE) +int __nvmem_layout_driver_register(struct nvmem_layout_driver *drv, + struct module *owner); void nvmem_layout_driver_unregister(struct nvmem_layout_driver *drv); #define module_nvmem_layout_driver(__nvmem_layout_driver) \ module_driver(__nvmem_layout_driver, nvmem_layout_driver_register, \ diff --git a/include/linux/objpool.h b/include/linux/objpool.h index 15aff4a17f0c..cb1758eaa2d3 100644 --- a/include/linux/objpool.h +++ b/include/linux/objpool.h @@ -5,6 +5,10 @@ #include <linux/types.h> #include <linux/refcount.h> +#include <linux/atomic.h> +#include <linux/cpumask.h> +#include <linux/irqflags.h> +#include <linux/smp.h> /* * objpool: ring-array based lockless MPMC queue @@ -69,7 +73,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); * struct objpool_head - object pooling metadata * @obj_size: object size, aligned to sizeof(void *) * @nr_objs: total objs (to be pre-allocated with objpool) - * @nr_cpus: local copy of nr_cpu_ids + * @nr_possible_cpus: cached value of num_possible_cpus() * @capacity: max objs can be managed by one objpool_slot * @gfp: gfp flags for kmalloc & vmalloc * @ref: refcount of objpool @@ -81,7 +85,7 @@ typedef int (*objpool_fini_cb)(struct objpool_head *head, void *context); struct objpool_head { int obj_size; int nr_objs; - int nr_cpus; + int nr_possible_cpus; int capacity; gfp_t gfp; refcount_t ref; @@ -118,13 +122,94 @@ int objpool_init(struct objpool_head *pool, int nr_objs, int object_size, gfp_t gfp, void *context, objpool_init_obj_cb objinit, objpool_fini_cb release); +/* try to retrieve object from slot */ +static inline void *__objpool_try_get_slot(struct objpool_head *pool, int cpu) +{ + struct objpool_slot *slot = pool->cpu_slots[cpu]; + /* load head snapshot, other cpus may change it */ + uint32_t head = smp_load_acquire(&slot->head); + + while (head != READ_ONCE(slot->last)) { + void *obj; + + /* + * data visibility of 'last' and 'head' could be out of + * order since memory updating of 'last' and 'head' are + * performed in push() and pop() independently + * + * before any retrieving attempts, pop() must guarantee + * 'last' is behind 'head', that is to say, there must + * be available objects in slot, which could be ensured + * by condition 'last != head && last - head <= nr_objs' + * that is equivalent to 'last - head - 1 < nr_objs' as + * 'last' and 'head' are both unsigned int32 + */ + if (READ_ONCE(slot->last) - head - 1 >= pool->nr_objs) { + head = READ_ONCE(slot->head); + continue; + } + + /* obj must be retrieved before moving forward head */ + obj = READ_ONCE(slot->entries[head & slot->mask]); + + /* move head forward to mark it's consumption */ + if (try_cmpxchg_release(&slot->head, &head, head + 1)) + return obj; + } + + return NULL; +} + /** * objpool_pop() - allocate an object from objpool * @pool: object pool * * return value: object ptr or NULL if failed */ -void *objpool_pop(struct objpool_head *pool); +static inline void *objpool_pop(struct objpool_head *pool) +{ + void *obj = NULL; + unsigned long flags; + int i, cpu; + + /* disable local irq to avoid preemption & interruption */ + raw_local_irq_save(flags); + + cpu = raw_smp_processor_id(); + for (i = 0; i < pool->nr_possible_cpus; i++) { + obj = __objpool_try_get_slot(pool, cpu); + if (obj) + break; + cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1); + } + raw_local_irq_restore(flags); + + return obj; +} + +/* adding object to slot, abort if the slot was already full */ +static inline int +__objpool_try_add_slot(void *obj, struct objpool_head *pool, int cpu) +{ + struct objpool_slot *slot = pool->cpu_slots[cpu]; + uint32_t head, tail; + + /* loading tail and head as a local snapshot, tail first */ + tail = READ_ONCE(slot->tail); + + do { + head = READ_ONCE(slot->head); + /* fault caught: something must be wrong */ + WARN_ON_ONCE(tail - head > pool->nr_objs); + } while (!try_cmpxchg_acquire(&slot->tail, &tail, tail + 1)); + + /* now the tail position is reserved for the given obj */ + WRITE_ONCE(slot->entries[tail & slot->mask], obj); + /* update sequence to make this obj available for pop() */ + smp_store_release(&slot->last, tail + 1); + + return 0; +} /** * objpool_push() - reclaim the object and return back to objpool @@ -134,7 +219,19 @@ void *objpool_pop(struct objpool_head *pool); * return: 0 or error code (it fails only when user tries to push * the same object multiple times or wrong "objects" into objpool) */ -int objpool_push(void *obj, struct objpool_head *pool); +static inline int objpool_push(void *obj, struct objpool_head *pool) +{ + unsigned long flags; + int rc; + + /* disable local irq to avoid preemption & interruption */ + raw_local_irq_save(flags); + rc = __objpool_try_add_slot(obj, pool, raw_smp_processor_id()); + raw_local_irq_restore(flags); + + return rc; +} + /** * objpool_drop() - discard the object and deref objpool diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 33212e93f4a6..b3b8d3dab52d 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -131,7 +131,7 @@ */ .macro VALIDATE_UNRET_BEGIN #if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) .Lhere_\@: .pushsection .discard.validate_unret .long .Lhere_\@ - . diff --git a/include/linux/of.h b/include/linux/of.h index 6a9ddf20e79a..a0bedd038a05 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -13,6 +13,7 @@ */ #include <linux/types.h> #include <linux/bitops.h> +#include <linux/cleanup.h> #include <linux/errno.h> #include <linux/kobject.h> #include <linux/mod_devicetable.h> @@ -134,6 +135,7 @@ static inline struct device_node *of_node_get(struct device_node *node) } static inline void of_node_put(struct device_node *node) { } #endif /* !CONFIG_OF_DYNAMIC */ +DEFINE_FREE(device_node, struct device_node *, if (_T) of_node_put(_T)) /* Pointer for first entry in chain of all nodes. */ extern struct device_node *of_root; @@ -180,11 +182,6 @@ static inline bool is_of_node(const struct fwnode_handle *fwnode) &__of_fwnode_handle_node->fwnode : NULL; \ }) -static inline bool of_have_populated_dt(void) -{ - return of_root != NULL; -} - static inline bool of_node_is_root(const struct device_node *node) { return node && (node->parent == NULL); @@ -294,6 +291,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_next_available_child( const struct device_node *node, struct device_node *prev); +extern struct device_node *of_get_next_reserved_child( + const struct device_node *node, struct device_node *prev); extern struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible); @@ -362,9 +361,6 @@ extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node, int index); extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread); -#define for_each_property_of_node(dn, pp) \ - for (pp = dn->properties; pp != NULL; pp = pp->next) - extern int of_n_addr_cells(struct device_node *np); extern int of_n_size_cells(struct device_node *np); extern const struct of_device_id *of_match_node( @@ -402,7 +398,20 @@ extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); -extern int of_machine_is_compatible(const char *compat); +bool of_machine_compatible_match(const char *const *compats); + +/** + * of_machine_is_compatible - Test root of device tree for a given compatible value + * @compat: compatible string to look for in root node's compatible property. + * + * Return: true if the root node has the given value in its compatible property. + */ +static inline bool of_machine_is_compatible(const char *compat) +{ + const char *compats[] = { compat, NULL }; + + return of_machine_compatible_match(compats); +} extern int of_add_property(struct device_node *np, struct property *prop); extern int of_remove_property(struct device_node *np, struct property *prop); @@ -541,6 +550,12 @@ static inline struct device_node *of_get_next_available_child( return NULL; } +static inline struct device_node *of_get_next_reserved_child( + const struct device_node *node, struct device_node *prev) +{ + return NULL; +} + static inline struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name) { @@ -549,11 +564,6 @@ static inline struct device_node *of_find_node_with_property( #define of_fwnode_handle(node) NULL -static inline bool of_have_populated_dt(void) -{ - return false; -} - static inline struct device_node *of_get_compatible_child(const struct device_node *parent, const char *compatible) { @@ -808,6 +818,11 @@ static inline int of_remove_property(struct device_node *np, struct property *pr return 0; } +static inline bool of_machine_compatible_match(const char *const *compats) +{ + return false; +} + static inline bool of_console_check(const struct device_node *dn, const char *name, int index) { return false; @@ -892,6 +907,9 @@ static inline int of_prop_val_eq(struct property *p1, struct property *p2) !memcmp(p1->value, p2->value, (size_t)p1->length); } +#define for_each_property_of_node(dn, pp) \ + for (pp = dn->properties; pp != NULL; pp = pp->next) + #if defined(CONFIG_OF) && defined(CONFIG_NUMA) extern int of_node_to_nid(struct device_node *np); #else @@ -1066,6 +1084,22 @@ static inline int of_parse_phandle_with_optional_args(const struct device_node * } /** + * of_phandle_args_equal() - Compare two of_phandle_args + * @a1: First of_phandle_args to compare + * @a2: Second of_phandle_args to compare + * + * Return: True if a1 and a2 are the same (same node pointer, same phandle + * args), false otherwise. + */ +static inline bool of_phandle_args_equal(const struct of_phandle_args *a1, + const struct of_phandle_args *a2) +{ + return a1->np == a2->np && + a1->args_count == a2->args_count && + !memcmp(a1->args, a2->args, sizeof(a1->args[0]) * a1->args_count); +} + +/** * of_property_count_u8_elems - Count the number of u8 elements in a property * * @np: device node from which the property value is to be read. @@ -1428,9 +1462,25 @@ static inline int of_property_read_s32(const struct device_node *np, #define for_each_child_of_node(parent, child) \ for (child = of_get_next_child(parent, NULL); child != NULL; \ child = of_get_next_child(parent, child)) + +#define for_each_child_of_node_scoped(parent, child) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_child(parent, NULL); \ + child != NULL; \ + child = of_get_next_child(parent, child)) + #define for_each_available_child_of_node(parent, child) \ for (child = of_get_next_available_child(parent, NULL); child != NULL; \ child = of_get_next_available_child(parent, child)) +#define for_each_reserved_child_of_node(parent, child) \ + for (child = of_get_next_reserved_child(parent, NULL); child != NULL; \ + child = of_get_next_reserved_child(parent, child)) + +#define for_each_available_child_of_node_scoped(parent, child) \ + for (struct device_node *child __free(device_node) = \ + of_get_next_available_child(parent, NULL); \ + child != NULL; \ + child = of_get_next_available_child(parent, child)) #define for_each_of_cpu_node(cpu) \ for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \ @@ -1634,6 +1684,21 @@ static inline bool of_device_is_system_power_controller(const struct device_node return of_property_read_bool(np, "system-power-controller"); } +/** + * of_have_populated_dt() - Has DT been populated by bootloader + * + * Return: True if a DTB has been populated by the bootloader and it isn't the + * empty builtin one. False otherwise. + */ +static inline bool of_have_populated_dt(void) +{ +#ifdef CONFIG_OF + return of_property_present(of_root, "compatible"); +#else + return false; +#endif +} + /* * Overlay support */ diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 4d7756087b6b..a4bea62bfa29 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -41,7 +41,7 @@ struct of_endpoint { bool of_graph_is_present(const struct device_node *node); int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint); -int of_graph_get_endpoint_count(const struct device_node *np); +unsigned int of_graph_get_endpoint_count(const struct device_node *np); struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); @@ -68,7 +68,7 @@ static inline int of_graph_parse_endpoint(const struct device_node *node, return -ENOSYS; } -static inline int of_graph_get_endpoint_count(const struct device_node *np) +static inline unsigned int of_graph_get_endpoint_count(const struct device_node *np) { return 0; } diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index 4de2a24cadc9..e338282da652 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -11,7 +11,6 @@ struct reserved_mem_ops; struct reserved_mem { const char *name; unsigned long fdt_node; - unsigned long phandle; const struct reserved_mem_ops *ops; phys_addr_t base; phys_addr_t size; diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 3921fbed0b28..6f9242259edc 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -17,10 +17,12 @@ * build_OID_registry.pl to generate the data for look_up_OID(). */ enum OID { + OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */ OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */ + OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */ @@ -28,6 +30,7 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ + OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */ @@ -64,7 +67,9 @@ enum OID { OID_PKU2U, /* 1.3.5.1.5.2.7 */ OID_Scram, /* 1.3.6.1.5.5.14 */ OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ + OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ + OID_id_ansip521r1, /* 1.3.132.0.35 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ OID_sha512, /* 2.16.840.1.101.3.4.2.3 */ diff --git a/include/linux/omap-mailbox.h b/include/linux/omap-mailbox.h index 8aa984ec1f38..3cc5c4ed7f5a 100644 --- a/include/linux/omap-mailbox.h +++ b/include/linux/omap-mailbox.h @@ -10,17 +10,4 @@ typedef uintptr_t mbox_msg_t; #define omap_mbox_message(data) (u32)(mbox_msg_t)(data) -typedef int __bitwise omap_mbox_irq_t; -#define IRQ_TX ((__force omap_mbox_irq_t) 1) -#define IRQ_RX ((__force omap_mbox_irq_t) 2) - -struct mbox_chan; -struct mbox_client; - -struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl, - const char *chan_name); - -void omap_mbox_enable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq); -void omap_mbox_disable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq); - #endif /* OMAP_MAILBOX_H */ diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 7b5cf4a5cd19..0c7e3dcfe867 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -31,8 +31,10 @@ * credit to Christian Biere. */ #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) -#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) -#define type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define __type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) +#define type_max(t) __type_max(typeof(t)) +#define __type_min(T) ((T)((T)-type_max(T)-(T)1)) +#define type_min(t) __type_min(typeof(t)) /* * Avoids triggering -Wtype-limits compilation warning, @@ -57,46 +59,123 @@ static inline bool __must_check __must_check_overflow(bool overflow) * @b: second addend * @d: pointer to store sum * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted addition, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * sum has overflowed or been truncated. + * *@d holds the results of the attempted addition, regardless of whether + * wrap-around occurred. */ #define check_add_overflow(a, b, d) \ __must_check_overflow(__builtin_add_overflow(a, b, d)) /** + * wrapping_add() - Intentionally perform a wrapping addition + * @type: type for result of calculation + * @a: first addend + * @b: second addend + * + * Return the potentially wrapped-around addition without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_add(type, a, b) \ + ({ \ + type __val; \ + __builtin_add_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_add() - Intentionally perform a wrapping increment assignment + * @var: variable to be incremented + * @offset: amount to add + * + * Increments @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_add(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_add(typeof(var), *__ptr, offset); \ + }) + +/** * check_sub_overflow() - Calculate subtraction with overflow checking * @a: minuend; value to subtract from * @b: subtrahend; value to subtract from @a * @d: pointer to store difference * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted subtraction, but is not considered - * "safe for use" on a non-zero return value, which indicates that the - * difference has underflowed or been truncated. + * *@d holds the results of the attempted subtraction, regardless of whether + * wrap-around occurred. */ #define check_sub_overflow(a, b, d) \ __must_check_overflow(__builtin_sub_overflow(a, b, d)) /** + * wrapping_sub() - Intentionally perform a wrapping subtraction + * @type: type for result of calculation + * @a: minuend; value to subtract from + * @b: subtrahend; value to subtract from @a + * + * Return the potentially wrapped-around subtraction without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_sub(type, a, b) \ + ({ \ + type __val; \ + __builtin_sub_overflow(a, b, &__val); \ + __val; \ + }) + +/** + * wrapping_assign_sub() - Intentionally perform a wrapping decrement assign + * @var: variable to be decremented + * @offset: amount to subtract + * + * Decrements @var by @offset with wrap-around. Returns the resulting + * value of @var. Will not trip any wrap-around sanitizers. + * + * Returns the new value of @var. + */ +#define wrapping_assign_sub(var, offset) \ + ({ \ + typeof(var) *__ptr = &(var); \ + *__ptr = wrapping_sub(typeof(var), *__ptr, offset); \ + }) + +/** * check_mul_overflow() - Calculate multiplication with overflow checking * @a: first factor * @b: second factor * @d: pointer to store product * - * Returns 0 on success. + * Returns true on wrap-around, false otherwise. * - * *@d holds the results of the attempted multiplication, but is not - * considered "safe for use" on a non-zero return value, which indicates - * that the product has overflowed or been truncated. + * *@d holds the results of the attempted multiplication, regardless of whether + * wrap-around occurred. */ #define check_mul_overflow(a, b, d) \ __must_check_overflow(__builtin_mul_overflow(a, b, d)) /** + * wrapping_mul() - Intentionally perform a wrapping multiplication + * @type: type for result of calculation + * @a: first factor + * @b: second factor + * + * Return the potentially wrapped-around multiplication without + * tripping any wrap-around sanitizers that may be enabled. + */ +#define wrapping_mul(type, a, b) \ + ({ \ + type __val; \ + __builtin_mul_overflow(a, b, &__val); \ + __val; \ + }) + +/** * check_shl_overflow() - Calculate a left-shifted value and check overflow * @a: Value to be shifted * @s: How many bits left to shift @@ -120,7 +199,7 @@ static inline bool __must_check __must_check_overflow(bool overflow) typeof(a) _a = a; \ typeof(s) _s = s; \ typeof(d) _d = d; \ - u64 _a_full = _a; \ + unsigned long long _a_full = _a; \ unsigned int _to_shift = \ is_non_negative(_s) && _s < 8 * sizeof(*d) ? _s : 0; \ *_d = (_a_full << _to_shift); \ @@ -130,10 +209,10 @@ static inline bool __must_check __must_check_overflow(bool overflow) #define __overflows_type_constexpr(x, T) ( \ is_unsigned_type(typeof(x)) ? \ - (x) > type_max(typeof(T)) : \ + (x) > type_max(T) : \ is_unsigned_type(typeof(T)) ? \ - (x) < 0 || (x) > type_max(typeof(T)) : \ - (x) < type_min(typeof(T)) || (x) > type_max(typeof(T))) + (x) < 0 || (x) > type_max(T) : \ + (x) < type_min(T) || (x) > type_max(T)) #define __overflows_type(x, T) ({ \ typeof(T) v = 0; \ @@ -319,7 +398,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * @count: Number of elements in the array; must be compile-time const. * @initializer: initializer expression (could be empty for no init). */ -#define _DEFINE_FLEX(type, name, member, count, initializer) \ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ _Static_assert(__builtin_constant_p(count), \ "onstack flex array members require compile-time const count"); \ union { \ @@ -329,8 +408,8 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) type *name = (type *)&name##_u /** - * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing - * flexible array member. + * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member, when it does not have a __counted_by annotation. * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. @@ -341,7 +420,24 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * flexible array member. * Use __struct_size(@name) to get compile-time size of it afterwards. */ -#define DEFINE_FLEX(type, name, member, count) \ +#define DEFINE_RAW_FLEX(type, name, member, count) \ _DEFINE_FLEX(type, name, member, count, = {}) +/** + * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing + * flexible array member. + * + * @TYPE: structure type name, including "struct" keyword. + * @NAME: Name for a variable to define. + * @MEMBER: Name of the array member. + * @COUNTER: Name of the __counted_by member. + * @COUNT: Number of elements in the array; must be compile-time const. + * + * Define a zeroed, on-stack, instance of @TYPE structure with a trailing + * flexible array member. + * Use __struct_size(@NAME) to get compile-time size of it afterwards. + */ +#define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/padata.h b/include/linux/padata.h index 495b16b6b4d7..0146daf34430 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -137,6 +137,7 @@ struct padata_shell { * appropriate for one worker thread to do at once. * @max_threads: Max threads to use for the job, actual number may be less * depending on task size and minimum chunk size. + * @numa_aware: Distribute jobs to different nodes with CPU in a round robin fashion. */ struct padata_mt_job { void (*thread_fn)(unsigned long start, unsigned long end, void *arg); @@ -146,6 +147,7 @@ struct padata_mt_job { unsigned long align; unsigned long min_chunk; int max_threads; + bool numa_aware; }; /** @@ -178,10 +180,6 @@ struct padata_instance { #ifdef CONFIG_PADATA extern void __init padata_init(void); -#else -static inline void __init padata_init(void) {} -#endif - extern struct padata_instance *padata_alloc(const char *name); extern void padata_free(struct padata_instance *pinst); extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); @@ -192,4 +190,12 @@ extern void padata_do_serial(struct padata_priv *padata); extern void __init padata_do_multithreaded(struct padata_mt_job *job); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, cpumask_var_t cpumask); +#else +static inline void __init padata_init(void) {} +static inline void __init padata_do_multithreaded(struct padata_mt_job *job) +{ + job->thread_fn(job->start, job->start + job->size, job->fn_arg); +} +#endif + #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 735cddc13d20..104078afe0b1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -109,7 +109,6 @@ enum pageflags { PG_active, PG_workingset, PG_error, - PG_slab, PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/ PG_arch_1, PG_reserved, @@ -190,7 +189,6 @@ enum pageflags { /* At least one page in this folio has the hwpoison flag set */ PG_has_hwpoisoned = PG_error, - PG_hugetlb = PG_active, PG_large_rmappable = PG_workingset, /* anon or file-backed */ }; @@ -237,7 +235,7 @@ static inline const struct page *page_fixed_fake_head(const struct page *page) } #endif -static __always_inline int page_is_fake_head(struct page *page) +static __always_inline int page_is_fake_head(const struct page *page) { return page_fixed_fake_head(page) != page; } @@ -281,12 +279,12 @@ static inline unsigned long _compound_head(const struct page *page) */ #define folio_page(folio, n) nth_page(&(folio)->page, n) -static __always_inline int PageTail(struct page *page) +static __always_inline int PageTail(const struct page *page) { return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page); } -static __always_inline int PageCompound(struct page *page) +static __always_inline int PageCompound(const struct page *page) { return test_bit(PG_head, &page->flags) || READ_ONCE(page->compound_head) & 1; @@ -306,6 +304,16 @@ static inline void page_init_poison(struct page *page, size_t size) } #endif +static const unsigned long *const_folio_flags(const struct folio *folio, + unsigned n) +{ + const struct page *page = &folio->page; + + VM_BUG_ON_PGFLAGS(PageTail(page), page); + VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page); + return &page[n].flags; +} + static unsigned long *folio_flags(struct folio *folio, unsigned n) { struct page *page = &folio->page; @@ -328,9 +336,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) * for compound page all operations related to the page flag applied to * head page. * - * PF_ONLY_HEAD: - * for compound page, callers only ever operate on the head page. - * * PF_NO_TAIL: * modifications of the page flag must be done on small or head pages, * checks can be done on tail pages too. @@ -346,9 +351,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) page; }) #define PF_ANY(page, enforce) PF_POISONED_CHECK(page) #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page)) -#define PF_ONLY_HEAD(page, enforce) ({ \ - VM_BUG_ON_PGFLAGS(PageTail(page), page); \ - PF_POISONED_CHECK(page); }) #define PF_NO_TAIL(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ PF_POISONED_CHECK(compound_head(page)); }) @@ -362,59 +364,81 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n) /* Which page is the flag stored in */ #define FOLIO_PF_ANY 0 #define FOLIO_PF_HEAD 0 -#define FOLIO_PF_ONLY_HEAD 0 #define FOLIO_PF_NO_TAIL 0 #define FOLIO_PF_NO_COMPOUND 0 #define FOLIO_PF_SECOND 1 +#define FOLIO_HEAD_PAGE 0 +#define FOLIO_SECOND_PAGE 1 + /* * Macros to create function definitions for page flags */ +#define FOLIO_TEST_FLAG(name, page) \ +static __always_inline bool folio_test_##name(const struct folio *folio) \ +{ return test_bit(PG_##name, const_folio_flags(folio, page)); } + +#define FOLIO_SET_FLAG(name, page) \ +static __always_inline void folio_set_##name(struct folio *folio) \ +{ set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void folio_clear_##name(struct folio *folio) \ +{ clear_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_SET_FLAG(name, page) \ +static __always_inline void __folio_set_##name(struct folio *folio) \ +{ __set_bit(PG_##name, folio_flags(folio, page)); } + +#define __FOLIO_CLEAR_FLAG(name, page) \ +static __always_inline void __folio_clear_##name(struct folio *folio) \ +{ __clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_SET_FLAG(name, page) \ +static __always_inline bool folio_test_set_##name(struct folio *folio) \ +{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_TEST_CLEAR_FLAG(name, page) \ +static __always_inline bool folio_test_clear_##name(struct folio *folio) \ +{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); } + +#define FOLIO_FLAG(name, page) \ +FOLIO_TEST_FLAG(name, page) \ +FOLIO_SET_FLAG(name, page) \ +FOLIO_CLEAR_FLAG(name, page) + #define TESTPAGEFLAG(uname, lname, policy) \ -static __always_inline bool folio_test_##lname(struct folio *folio) \ -{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ -static __always_inline int Page##uname(struct page *page) \ +FOLIO_TEST_FLAG(lname, FOLIO_##policy) \ +static __always_inline int Page##uname(const struct page *page) \ { return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_set_##lname(struct folio *folio) \ -{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void folio_clear_##lname(struct folio *folio) \ -{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_set_##lname(struct folio *folio) \ -{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline void __SetPage##uname(struct page *page) \ { __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ -static __always_inline \ -void __folio_clear_##lname(struct folio *folio) \ -{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_set_##lname(struct folio *folio) \ -{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestSetPage##uname(struct page *page) \ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ -static __always_inline \ -bool folio_test_clear_##lname(struct folio *folio) \ -{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \ +FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy) \ static __always_inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } @@ -432,30 +456,51 @@ static __always_inline int TestClearPage##uname(struct page *page) \ TESTSETFLAG(uname, lname, policy) \ TESTCLEARFLAG(uname, lname, policy) +#define FOLIO_TEST_FLAG_FALSE(name) \ +static inline bool folio_test_##name(const struct folio *folio) \ +{ return false; } +#define FOLIO_SET_FLAG_NOOP(name) \ +static inline void folio_set_##name(struct folio *folio) { } +#define FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void folio_clear_##name(struct folio *folio) { } +#define __FOLIO_SET_FLAG_NOOP(name) \ +static inline void __folio_set_##name(struct folio *folio) { } +#define __FOLIO_CLEAR_FLAG_NOOP(name) \ +static inline void __folio_clear_##name(struct folio *folio) { } +#define FOLIO_TEST_SET_FLAG_FALSE(name) \ +static inline bool folio_test_set_##name(struct folio *folio) \ +{ return false; } +#define FOLIO_TEST_CLEAR_FLAG_FALSE(name) \ +static inline bool folio_test_clear_##name(struct folio *folio) \ +{ return false; } + +#define FOLIO_FLAG_FALSE(name) \ +FOLIO_TEST_FLAG_FALSE(name) \ +FOLIO_SET_FLAG_NOOP(name) \ +FOLIO_CLEAR_FLAG_NOOP(name) + #define TESTPAGEFLAG_FALSE(uname, lname) \ -static inline bool folio_test_##lname(const struct folio *folio) { return false; } \ +FOLIO_TEST_FLAG_FALSE(lname) \ static inline int Page##uname(const struct page *page) { return 0; } #define SETPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_set_##lname(struct folio *folio) { } \ +FOLIO_SET_FLAG_NOOP(lname) \ static inline void SetPage##uname(struct page *page) { } #define CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void folio_clear_##lname(struct folio *folio) { } \ +FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void ClearPage##uname(struct page *page) { } #define __CLEARPAGEFLAG_NOOP(uname, lname) \ -static inline void __folio_clear_##lname(struct folio *folio) { } \ +__FOLIO_CLEAR_FLAG_NOOP(lname) \ static inline void __ClearPage##uname(struct page *page) { } #define TESTSETFLAG_FALSE(uname, lname) \ -static inline bool folio_test_set_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_SET_FLAG_FALSE(lname) \ static inline int TestSetPage##uname(struct page *page) { return 0; } #define TESTCLEARFLAG_FALSE(uname, lname) \ -static inline bool folio_test_clear_##lname(struct folio *folio) \ -{ return 0; } \ +FOLIO_TEST_CLEAR_FLAG_FALSE(lname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } #define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname) \ @@ -465,11 +510,11 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname) __PAGEFLAG(Locked, locked, PF_NO_TAIL) -PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) +FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE) PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) -PAGEFLAG(Referenced, referenced, PF_HEAD) - TESTCLEARFLAG(Referenced, referenced, PF_HEAD) - __SETPAGEFLAG(Referenced, referenced, PF_HEAD) +FOLIO_FLAG(referenced, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(referenced, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(referenced, FOLIO_HEAD_PAGE) PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) @@ -478,7 +523,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) TESTCLEARFLAG(Active, active, PF_HEAD) PAGEFLAG(Workingset, workingset, PF_HEAD) TESTCLEARFLAG(Workingset, workingset, PF_HEAD) -__PAGEFLAG(Slab, slab, PF_NO_TAIL) PAGEFLAG(Checked, checked, PF_NO_COMPOUND) /* Used by some filesystems */ /* Xen */ @@ -532,13 +576,13 @@ PAGEFLAG_FALSE(HighMem, highmem) #endif #ifdef CONFIG_SWAP -static __always_inline bool folio_test_swapcache(struct folio *folio) +static __always_inline bool folio_test_swapcache(const struct folio *folio) { return folio_test_swapbacked(folio) && - test_bit(PG_swapcache, folio_flags(folio, 0)); + test_bit(PG_swapcache, const_folio_flags(folio, 0)); } -static __always_inline bool PageSwapCache(struct page *page) +static __always_inline bool PageSwapCache(const struct page *page) { return folio_test_swapcache(page_folio(page)); } @@ -582,11 +626,18 @@ PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 #endif -#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) -TESTPAGEFLAG(Young, young, PF_ANY) -SETPAGEFLAG(Young, young, PF_ANY) -TESTCLEARFLAG(Young, young, PF_ANY) -PAGEFLAG(Idle, idle, PF_ANY) +#ifdef CONFIG_PAGE_IDLE_FLAG +#ifdef CONFIG_64BIT +FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE) +FOLIO_FLAG(idle, FOLIO_HEAD_PAGE) +#endif +/* See page_idle.h for !64BIT workaround */ +#else /* !CONFIG_PAGE_IDLE_FLAG */ +FOLIO_FLAG_FALSE(young) +FOLIO_TEST_CLEAR_FLAG_FALSE(young) +FOLIO_FLAG_FALSE(idle) #endif /* @@ -637,22 +688,22 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) */ #define PAGE_MAPPING_DAX_SHARED ((void *)0x1) -static __always_inline bool folio_mapping_flags(struct folio *folio) +static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline int PageMappingFlags(struct page *page) +static __always_inline bool PageMappingFlags(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0; } -static __always_inline bool folio_test_anon(struct folio *folio) +static __always_inline bool folio_test_anon(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0; } -static __always_inline bool PageAnon(struct page *page) +static __always_inline bool PageAnon(const struct page *page) { return folio_test_anon(page_folio(page)); } @@ -663,7 +714,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio) PAGE_MAPPING_MOVABLE; } -static __always_inline int __PageMovable(struct page *page) +static __always_inline bool __PageMovable(const struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_MOVABLE; @@ -676,13 +727,13 @@ static __always_inline int __PageMovable(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static __always_inline bool folio_test_ksm(struct folio *folio) +static __always_inline bool folio_test_ksm(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) == PAGE_MAPPING_KSM; } -static __always_inline bool PageKsm(struct page *page) +static __always_inline bool PageKsm(const struct page *page) { return folio_test_ksm(page_folio(page)); } @@ -690,7 +741,7 @@ static __always_inline bool PageKsm(struct page *page) TESTPAGEFLAG_FALSE(Ksm, ksm) #endif -u64 stable_page_flags(struct page *page); +u64 stable_page_flags(const struct page *page); /** * folio_xor_flags_has_waiters - Change some folio flags. @@ -721,9 +772,9 @@ static inline bool folio_xor_flags_has_waiters(struct folio *folio, * some of the bytes in it may be; see the is_partially_uptodate() * address_space operation. */ -static inline bool folio_test_uptodate(struct folio *folio) +static inline bool folio_test_uptodate(const struct folio *folio) { - bool ret = test_bit(PG_uptodate, folio_flags(folio, 0)); + bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0)); /* * Must ensure that the data we read out of the folio is loaded * _after_ we've loaded folio->flags to check the uptodate bit. @@ -738,7 +789,7 @@ static inline bool folio_test_uptodate(struct folio *folio) return ret; } -static inline int PageUptodate(struct page *page) +static inline bool PageUptodate(const struct page *page) { return folio_test_uptodate(page_folio(page)); } @@ -780,12 +831,12 @@ void set_page_writeback(struct page *page); #define folio_start_writeback_keepwrite(folio) \ __folio_start_writeback(folio, true) -static __always_inline bool folio_test_head(struct folio *folio) +static __always_inline bool folio_test_head(const struct folio *folio) { - return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY)); + return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY)); } -static __always_inline int PageHead(struct page *page) +static __always_inline int PageHead(const struct page *page) { PF_POISONED_CHECK(page); return test_bit(PG_head, &page->flags) && !page_is_fake_head(page); @@ -801,7 +852,7 @@ CLEARPAGEFLAG(Head, head, PF_ANY) * * Return: True if the folio is larger than one page. */ -static inline bool folio_test_large(struct folio *folio) +static inline bool folio_test_large(const struct folio *folio) { return folio_test_head(folio); } @@ -822,36 +873,13 @@ static inline void ClearPageCompound(struct page *page) BUG_ON(!PageHead(page)); ClearPageHead(page); } -PAGEFLAG(LargeRmappable, large_rmappable, PF_SECOND) +FOLIO_FLAG(large_rmappable, FOLIO_SECOND_PAGE) #else -TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable) +FOLIO_FLAG_FALSE(large_rmappable) #endif #define PG_head_mask ((1UL << PG_head)) -#ifdef CONFIG_HUGETLB_PAGE -int PageHuge(struct page *page); -SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) -CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND) - -/** - * folio_test_hugetlb - Determine if the folio belongs to hugetlbfs - * @folio: The folio to test. - * - * Context: Any context. Caller should have a reference on the folio to - * prevent it from being turned into a tail page. - * Return: True for hugetlbfs folios, false for anon folios or folios - * belonging to other filesystems. - */ -static inline bool folio_test_hugetlb(struct folio *folio) -{ - return folio_test_large(folio) && - test_bit(PG_hugetlb, folio_flags(folio, 1)); -} -#else -TESTPAGEFLAG_FALSE(Huge, hugetlb) -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * PageHuge() only returns true for hugetlbfs pages, but not for @@ -861,7 +889,7 @@ TESTPAGEFLAG_FALSE(Huge, hugetlb) * hugetlbfs pages, but not normal pages. PageTransHuge() can only be * called only in the core VM paths where hugetlbfs pages can't exist. */ -static inline int PageTransHuge(struct page *page) +static inline int PageTransHuge(const struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); return PageHead(page); @@ -872,7 +900,7 @@ static inline int PageTransHuge(struct page *page) * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransCompound(struct page *page) +static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } @@ -882,7 +910,7 @@ static inline int PageTransCompound(struct page *page) * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. */ -static inline int PageTransTail(struct page *page) +static inline int PageTransTail(const struct page *page) { return PageTail(page); } @@ -908,33 +936,23 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) #endif /* - * Check if a page is currently marked HWPoisoned. Note that this check is - * best effort only and inherently racy: there is no way to synchronize with - * failing hardware. - */ -static inline bool is_page_hwpoison(struct page *page) -{ - if (PageHWPoison(page)) - return true; - return PageHuge(page) && PageHWPoison(compound_head(page)); -} - -/* - * For pages that are never mapped to userspace (and aren't PageSlab), + * For pages that are never mapped to userspace, * page_type may be used. Because it is initialised to -1, we invert the * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and * __ClearPageFoo *sets* the bit used for PageFoo. We reserve a few high and - * low bits so that an underflow or overflow of page_mapcount() won't be + * low bits so that an underflow or overflow of _mapcount won't be * mistaken for a page type value. */ #define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of page_mapcount */ +/* Reserve 0x0000007f to catch underflows of _mapcount */ #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 #define PG_table 0x00000200 #define PG_guard 0x00000400 +#define PG_hugetlb 0x00000800 +#define PG_slab 0x00001000 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -946,40 +964,43 @@ static inline int page_type_has_type(unsigned int page_type) return (int)page_type < PAGE_MAPCOUNT_RESERVE; } -static inline int page_has_type(struct page *page) +static inline int page_has_type(const struct page *page) { return page_type_has_type(page->page_type); } +#define FOLIO_TYPE_OPS(lname, fname) \ +static __always_inline bool folio_test_##fname(const struct folio *folio)\ +{ \ + return folio_test_type(folio, PG_##lname); \ +} \ +static __always_inline void __folio_set_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ + folio->page.page_type &= ~PG_##lname; \ +} \ +static __always_inline void __folio_clear_##fname(struct folio *folio) \ +{ \ + VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ + folio->page.page_type |= PG_##lname; \ +} + #define PAGE_TYPE_OPS(uname, lname, fname) \ +FOLIO_TYPE_OPS(lname, fname) \ static __always_inline int Page##uname(const struct page *page) \ { \ return PageType(page, PG_##lname); \ } \ -static __always_inline int folio_test_##fname(const struct folio *folio)\ -{ \ - return folio_test_type(folio, PG_##lname); \ -} \ static __always_inline void __SetPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!PageType(page, 0), page); \ page->page_type &= ~PG_##lname; \ } \ -static __always_inline void __folio_set_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_type(folio, 0), folio); \ - folio->page.page_type &= ~PG_##lname; \ -} \ static __always_inline void __ClearPage##uname(struct page *page) \ { \ VM_BUG_ON_PAGE(!Page##uname(page), page); \ page->page_type |= PG_##lname; \ -} \ -static __always_inline void __folio_clear_##fname(struct folio *folio) \ -{ \ - VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ - folio->page.page_type |= PG_##lname; \ -} \ +} /* * PageBuddy() indicates that the page is free and in the buddy system @@ -1026,14 +1047,67 @@ PAGE_TYPE_OPS(Table, table, pgtable) */ PAGE_TYPE_OPS(Guard, guard, guard) -extern bool is_free_buddy_page(struct page *page); +FOLIO_TYPE_OPS(slab, slab) + +/** + * PageSlab - Determine if the page belongs to the slab allocator + * @page: The page to test. + * + * Context: Any context. + * Return: True for slab pages, false for any other kind of page. + */ +static inline bool PageSlab(const struct page *page) +{ + return folio_test_slab(page_folio(page)); +} + +#ifdef CONFIG_HUGETLB_PAGE +FOLIO_TYPE_OPS(hugetlb, hugetlb) +#else +FOLIO_TEST_FLAG_FALSE(hugetlb) +#endif + +/** + * PageHuge - Determine if the page belongs to hugetlbfs + * @page: The page to test. + * + * Context: Any context. + * Return: True for hugetlbfs pages, false for anon pages or pages + * belonging to other filesystems. + */ +static inline bool PageHuge(const struct page *page) +{ + return folio_test_hugetlb(page_folio(page)); +} + +/* + * Check if a page is currently marked HWPoisoned. Note that this check is + * best effort only and inherently racy: there is no way to synchronize with + * failing hardware. + */ +static inline bool is_page_hwpoison(const struct page *page) +{ + const struct folio *folio; + + if (PageHWPoison(page)) + return true; + folio = page_folio(page); + return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); +} + +bool is_free_buddy_page(const struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); -static __always_inline int PageAnonExclusive(struct page *page) +static __always_inline int PageAnonExclusive(const struct page *page) { VM_BUG_ON_PGFLAGS(!PageAnon(page), page); - VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page); + /* + * HugeTLB stores this information on the head page; THP keeps it per + * page + */ + if (PageHuge(page)) + page = compound_head(page); return test_bit(PG_anon_exclusive, &PF_ANY(page, 1)->flags); } @@ -1072,7 +1146,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) (1UL << PG_lru | 1UL << PG_locked | \ 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ - 1UL << PG_slab | 1UL << PG_active | \ + 1UL << PG_active | \ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* @@ -1092,7 +1166,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) */ #define PAGE_FLAGS_SECOND \ (0xffUL /* order */ | 1UL << PG_has_hwpoisoned | \ - 1UL << PG_hugetlb | 1UL << PG_large_rmappable) + 1UL << PG_large_rmappable) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) @@ -1103,19 +1177,18 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page) * Determine if a page has private stuff, indicating that release routines * should be invoked upon it. */ -static inline int page_has_private(struct page *page) +static inline int page_has_private(const struct page *page) { return !!(page->flags & PAGE_FLAGS_PRIVATE); } -static inline bool folio_has_private(struct folio *folio) +static inline bool folio_has_private(const struct folio *folio) { return page_has_private(&folio->page); } #undef PF_ANY #undef PF_HEAD -#undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND #undef PF_SECOND diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 4ac34392823a..c16db0067090 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -34,8 +34,9 @@ static inline bool is_migrate_isolate(int migratetype) #define REPORT_FAILURE 0x2 void set_pageblock_migratetype(struct page *page, int migratetype); -int move_freepages_block(struct zone *zone, struct page *page, - int migratetype, int *num_movable); + +bool move_freepages_block_isolate(struct zone *zone, struct page *page, + int migratetype); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int migratetype, int flags, gfp_t gfp_flags); diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index c141ea9a95ef..8cd858d912c4 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -4,7 +4,7 @@ #include <linux/atomic.h> #include <linux/cache.h> -#include <linux/kernel.h> +#include <linux/limits.h> #include <asm/page.h> struct page_counter { diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index be98564191e6..e4b48a0dda24 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -4,7 +4,6 @@ #include <linux/types.h> #include <linux/stacktrace.h> -#include <linux/stackdepot.h> struct pglist_data; @@ -78,7 +77,7 @@ static inline void page_ext_init(void) } #endif -extern struct page_ext *page_ext_get(struct page *page); +extern struct page_ext *page_ext_get(const struct page *page); extern void page_ext_put(struct page_ext *page_ext); static inline void *page_ext_data(struct page_ext *page_ext, @@ -118,7 +117,7 @@ static inline void page_ext_init_flatmem(void) { } -static inline struct page_ext *page_ext_get(struct page *page) +static inline struct page_ext *page_ext_get(const struct page *page) { return NULL; } diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index d8f344840643..89ca0d5dc1e7 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -6,14 +6,12 @@ #include <linux/page-flags.h> #include <linux/page_ext.h> -#ifdef CONFIG_PAGE_IDLE_FLAG - -#ifndef CONFIG_64BIT +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) /* * If there is not enough space to store Idle and Young bits in page flags, use * page ext flags instead. */ -static inline bool folio_test_young(struct folio *folio) +static inline bool folio_test_young(const struct folio *folio) { struct page_ext *page_ext = page_ext_get(&folio->page); bool page_young; @@ -52,7 +50,7 @@ static inline bool folio_test_clear_young(struct folio *folio) return page_young; } -static inline bool folio_test_idle(struct folio *folio) +static inline bool folio_test_idle(const struct folio *folio) { struct page_ext *page_ext = page_ext_get(&folio->page); bool page_idle; @@ -60,7 +58,7 @@ static inline bool folio_test_idle(struct folio *folio) if (unlikely(!page_ext)) return false; - page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags); + page_idle = test_bit(PAGE_EXT_IDLE, &page_ext->flags); page_ext_put(page_ext); return page_idle; @@ -87,61 +85,5 @@ static inline void folio_clear_idle(struct folio *folio) clear_bit(PAGE_EXT_IDLE, &page_ext->flags); page_ext_put(page_ext); } -#endif /* !CONFIG_64BIT */ - -#else /* !CONFIG_PAGE_IDLE_FLAG */ - -static inline bool folio_test_young(struct folio *folio) -{ - return false; -} - -static inline void folio_set_young(struct folio *folio) -{ -} - -static inline bool folio_test_clear_young(struct folio *folio) -{ - return false; -} - -static inline bool folio_test_idle(struct folio *folio) -{ - return false; -} - -static inline void folio_set_idle(struct folio *folio) -{ -} - -static inline void folio_clear_idle(struct folio *folio) -{ -} - -#endif /* CONFIG_PAGE_IDLE_FLAG */ - -static inline bool page_is_young(struct page *page) -{ - return folio_test_young(page_folio(page)); -} - -static inline void set_page_young(struct page *page) -{ - folio_set_young(page_folio(page)); -} - -static inline bool test_and_clear_page_young(struct page *page) -{ - return folio_test_clear_young(page_folio(page)); -} - -static inline bool page_is_idle(struct page *page) -{ - return folio_test_idle(page_folio(page)); -} - -static inline void set_page_idle(struct page *page) -{ - folio_set_idle(page_folio(page)); -} +#endif /* CONFIG_PAGE_IDLE_FLAG && !64BIT */ #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index 119a0c9d2a8b..debdc25f08b9 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -11,7 +11,8 @@ extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned short order); extern void __set_page_owner(struct page *page, unsigned short order, gfp_t gfp_mask); -extern void __split_page_owner(struct page *page, unsigned int nr); +extern void __split_page_owner(struct page *page, int old_order, + int new_order); extern void __folio_copy_owner(struct folio *newfolio, struct folio *old); extern void __set_page_owner_migrate_reason(struct page *page, int reason); extern void __dump_page_owner(const struct page *page); @@ -31,10 +32,11 @@ static inline void set_page_owner(struct page *page, __set_page_owner(page, order, gfp_mask); } -static inline void split_page_owner(struct page *page, unsigned int nr) +static inline void split_page_owner(struct page *page, int old_order, + int new_order) { if (static_branch_unlikely(&page_owner_inited)) - __split_page_owner(page, nr); + __split_page_owner(page, old_order, new_order); } static inline void folio_copy_owner(struct folio *newfolio, struct folio *old) { @@ -56,11 +58,11 @@ static inline void reset_page_owner(struct page *page, unsigned short order) { } static inline void set_page_owner(struct page *page, - unsigned int order, gfp_t gfp_mask) + unsigned short order, gfp_t gfp_mask) { } -static inline void split_page_owner(struct page *page, - unsigned short order) +static inline void split_page_owner(struct page *page, int old_order, + int new_order) { } static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index d7c2d33baa7f..1acf5bac7f50 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -139,20 +139,15 @@ static inline void folio_ref_sub(struct folio *folio, int nr) page_ref_sub(&folio->page, nr); } -static inline int page_ref_sub_return(struct page *page, int nr) +static inline int folio_ref_sub_return(struct folio *folio, int nr) { - int ret = atomic_sub_return(nr, &page->_refcount); + int ret = atomic_sub_return(nr, &folio->_refcount); if (page_ref_tracepoint_active(page_ref_mod_and_return)) - __page_ref_mod_and_return(page, -nr, ret); + __page_ref_mod_and_return(&folio->page, -nr, ret); return ret; } -static inline int folio_ref_sub_return(struct folio *folio, int nr) -{ - return page_ref_sub_return(&folio->page, nr); -} - static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 3f2409b968ec..547e82cdc89a 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -28,7 +28,7 @@ enum pageblock_bits { NR_PAGEBLOCK_BITS }; -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE @@ -45,7 +45,11 @@ extern unsigned int pageblock_order; #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -#else /* CONFIG_HUGETLB_PAGE */ +#elif defined(CONFIG_TRANSPARENT_HUGEPAGE) + +#define pageblock_order min_t(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) + +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ #define pageblock_order MAX_PAGE_ORDER diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2df35e65557d..3d69589c00a4 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -40,6 +40,8 @@ int filemap_fdatawait_keep_errors(struct address_space *mapping); int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); int filemap_fdatawait_range_keep_errors(struct address_space *mapping, loff_t start_byte, loff_t end_byte); +int filemap_invalidate_inode(struct inode *inode, bool flush, + loff_t start, loff_t end); static inline int filemap_fdatawait(struct address_space *mapping) { @@ -542,24 +544,22 @@ static inline void *detach_page_private(struct page *page) #endif #ifdef CONFIG_NUMA -struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); #else -static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order) +static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) { - return folio_alloc(gfp, order); + return folio_alloc_noprof(gfp, order); } #endif +#define filemap_alloc_folio(...) \ + alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__)) + static inline struct page *__page_cache_alloc(gfp_t gfp) { return &filemap_alloc_folio(gfp, 0)->page; } -static inline struct page *page_cache_alloc(struct address_space *x) -{ - return __page_cache_alloc(mapping_gfp_mask(x)); -} - static inline gfp_t readahead_gfp_mask(struct address_space *x) { return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; @@ -1014,7 +1014,7 @@ static inline bool folio_trylock(struct folio *folio) /* * Return true if the page was successfully locked */ -static inline int trylock_page(struct page *page) +static inline bool trylock_page(struct page *page) { return folio_trylock(page_folio(page)); } @@ -1144,11 +1144,6 @@ void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); void folio_wait_stable(struct folio *folio); void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); -static inline void __set_page_dirty(struct page *page, - struct address_space *mapping, int warn) -{ - __folio_mark_dirty(page_folio(page), mapping, warn); -} void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); void __folio_cancel_dirty(struct folio *folio); static inline void folio_cancel_dirty(struct folio *folio) @@ -1160,7 +1155,6 @@ static inline void folio_cancel_dirty(struct folio *folio) bool folio_clear_dirty_for_io(struct folio *folio); bool clear_page_dirty_for_io(struct page *page); void folio_invalidate(struct folio *folio, size_t offset, size_t length); -int __set_page_dirty_nobuffers(struct page *page); bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); #ifdef CONFIG_MIGRATION diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 87cc678adc85..5d3a0cccc6bf 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -11,8 +11,8 @@ #include <linux/types.h> -/* 15 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 15 +/* 31 pointers + header align the folio_batch structure to a power of two */ +#define PAGEVEC_SIZE 31 struct folio; @@ -27,6 +27,7 @@ struct folio; */ struct folio_batch { unsigned char nr; + unsigned char i; bool percpu_pvec_drained; struct folio *folios[PAGEVEC_SIZE]; }; @@ -40,12 +41,14 @@ struct folio_batch { static inline void folio_batch_init(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->i = 0; fbatch->percpu_pvec_drained = false; } static inline void folio_batch_reinit(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->i = 0; } static inline unsigned int folio_batch_count(struct folio_batch *fbatch) @@ -75,6 +78,21 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch, return folio_batch_space(fbatch); } +/** + * folio_batch_next - Return the next folio to process. + * @fbatch: The folio batch being processed. + * + * Use this function to implement a queue of folios. + * + * Return: The next folio in the queue, or NULL if the queue is empty. + */ +static inline struct folio *folio_batch_next(struct folio_batch *fbatch) +{ + if (fbatch->i == fbatch->nr) + return NULL; + return fbatch->folios[fbatch->i++]; +} + void __folio_batch_release(struct folio_batch *pvec); static inline void folio_batch_release(struct folio_batch *fbatch) diff --git a/include/linux/papr_scm.h b/include/linux/papr_scm.h new file mode 100644 index 000000000000..eb36453813db --- /dev/null +++ b/include/linux/papr_scm.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __LINUX_PAPR_SCM_H +#define __LINUX_PAPR_SCM_H + +/* DIMM health bitmap indicators */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are persisted from previous IPL */ +#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +/* SCM device memory life remaining is critically low */ +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) +/* SCM device is unable to persist memory contents in certain conditions */ +#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) +/* SCM device is encrypted */ +#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) +/* SCM device has been scrubbed and locked */ +#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) + +#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED) + +#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) +#define PAPR_SCM_PERF_STATS_VERSION 0x1 + +#endif /* __LINUX_PAPR_SCM_H */ diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index abeba356bc3f..ac8c44dd8237 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -59,7 +59,7 @@ static inline void part_stat_set_all(struct block_device *part, int value) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ - if ((part)->bd_partno) \ + if (bdev_is_partition(part)) \ __part_stat_add(bdev_whole(part), field, addnd); \ } while (0) diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 40ea18f5aa02..acc5f96161fe 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -128,6 +128,8 @@ struct pci_epc_mem { * @group: configfs group representing the PCI EPC device * @lock: mutex to protect pci_epc ops * @function_num_map: bitmap to manage physical function number + * @init_complete: flag to indicate whether the EPC initialization is complete + * or not */ struct pci_epc { struct device dev; @@ -143,28 +145,53 @@ struct pci_epc { /* mutex to protect against concurrent access of EP controller */ struct mutex lock; unsigned long function_num_map; + bool init_complete; +}; + +/** + * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC. + * @BAR_FIXED: The BAR mask is fixed by the hardware. + * @BAR_RESERVED: The BAR should not be touched by an EPF driver. + */ +enum pci_epc_bar_type { + BAR_PROGRAMMABLE = 0, + BAR_FIXED, + BAR_RESERVED, +}; + +/** + * struct pci_epc_bar_desc - hardware description for a BAR + * @type: the type of the BAR + * @fixed_size: the fixed size, only applicable if type is BAR_FIXED_MASK. + * @only_64bit: if true, an EPF driver is not allowed to choose if this BAR + * should be configured as 32-bit or 64-bit, the EPF driver must + * configure this BAR as 64-bit. Additionally, the BAR succeeding + * this BAR must be set to type BAR_RESERVED. + * + * only_64bit should not be set on a BAR of type BAR_RESERVED. + * (If BARx is a 64-bit BAR that an EPF driver is not allowed to + * touch, then both BARx and BARx+1 must be set to type + * BAR_RESERVED.) + */ +struct pci_epc_bar_desc { + enum pci_epc_bar_type type; + u64 fixed_size; + bool only_64bit; }; /** * struct pci_epc_features - features supported by a EPC device per function * @linkup_notifier: indicate if the EPC device can notify EPF driver on link up - * @core_init_notifier: indicate cores that can notify about their availability - * for initialization * @msi_capable: indicate if the endpoint function has MSI capability * @msix_capable: indicate if the endpoint function has MSI-X capability - * @reserved_bar: bitmap to indicate reserved BAR unavailable to function driver - * @bar_fixed_64bit: bitmap to indicate fixed 64bit BARs - * @bar_fixed_size: Array specifying the size supported by each BAR + * @bar: array specifying the hardware description for each BAR * @align: alignment size required for BAR buffer allocation */ struct pci_epc_features { unsigned int linkup_notifier : 1; - unsigned int core_init_notifier : 1; unsigned int msi_capable : 1; unsigned int msix_capable : 1; - u8 reserved_bar; - u8 bar_fixed_64bit; - u64 bar_fixed_size[PCI_STD_NUM_BARS]; + struct pci_epc_bar_desc bar[PCI_STD_NUM_BARS]; size_t align; }; @@ -198,6 +225,7 @@ int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf, void pci_epc_linkup(struct pci_epc *epc); void pci_epc_linkdown(struct pci_epc *epc); void pci_epc_init_notify(struct pci_epc *epc); +void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf); void pci_epc_bme_notify(struct pci_epc *epc); void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 77b146e0f672..adee6a1b35db 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -15,6 +15,7 @@ #include <linux/pci.h> struct pci_epf; +struct pci_epc_features; enum pci_epc_interface_type; enum pci_barno { @@ -216,7 +217,8 @@ int __pci_epf_register_driver(struct pci_epf_driver *driver, struct module *owner); void pci_epf_unregister_driver(struct pci_epf_driver *driver); void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar, - size_t align, enum pci_epc_interface_type type); + const struct pci_epc_features *epc_features, + enum pci_epc_interface_type type); void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar, enum pci_epc_interface_type type); int pci_epf_bind(struct pci_epf *epf); diff --git a/include/linux/pci.h b/include/linux/pci.h index 7ab0d13672da..fb004fd4e889 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -51,7 +51,7 @@ PCI_STATUS_PARITY) /* Number of reset methods used in pci_reset_fn_methods array in pci.c */ -#define PCI_NUM_RESET_METHODS 7 +#define PCI_NUM_RESET_METHODS 8 #define PCI_RESET_PROBE true #define PCI_RESET_DO_RESET false @@ -390,9 +390,9 @@ struct pci_dev { unsigned int d3hot_delay; /* D3hot->D0 transition time in ms */ unsigned int d3cold_delay; /* D3cold->D0 transition time in ms */ + u16 l1ss; /* L1SS Capability pointer */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ - u16 l1ss; /* L1SS Capability pointer */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ #endif @@ -413,6 +413,8 @@ struct pci_dev { struct resource driver_exclusive_resource; /* driver exclusive resource ranges */ bool match_driver; /* Skip attaching driver */ + struct lock_class_key cfg_access_key; + struct lockdep_map cfg_access_lock; unsigned int transparent:1; /* Subtractive decode bridge */ unsigned int io_window:1; /* Bridge has I/O window */ @@ -1077,8 +1079,6 @@ enum { #define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ #define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ -#define PCI_IRQ_LEGACY PCI_IRQ_INTX /* Deprecated! Use PCI_IRQ_INTX */ - /* These external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI @@ -1315,7 +1315,6 @@ int pci_user_write_config_word(struct pci_dev *dev, int where, u16 val); int pci_user_write_config_dword(struct pci_dev *dev, int where, u32 val); int __must_check pci_enable_device(struct pci_dev *dev); -int __must_check pci_enable_device_io(struct pci_dev *dev); int __must_check pci_enable_device_mem(struct pci_dev *dev); int __must_check pci_reenable_device(struct pci_dev *); int __must_check pcim_enable_device(struct pci_dev *pdev); @@ -1648,8 +1647,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, */ #define PCI_IRQ_VIRTUAL (1 << 4) -#define PCI_IRQ_ALL_TYPES \ - (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) +#define PCI_IRQ_ALL_TYPES (PCI_IRQ_INTX | PCI_IRQ_MSI | PCI_IRQ_MSIX) #include <linux/dmapool.h> @@ -1658,8 +1656,6 @@ struct msix_entry { u16 entry; /* Driver uses to specify entry, OS writes */ }; -struct msi_domain_template; - #ifdef CONFIG_PCI_MSI int pci_msi_vec_count(struct pci_dev *dev); void pci_disable_msi(struct pci_dev *dev); @@ -1692,11 +1688,6 @@ void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); -bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, - unsigned int hwsize, void *data); -struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, - const struct irq_affinity_desc *affdesc); -void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1719,7 +1710,7 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, struct irq_affinity *aff_desc) { - if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) + if ((flags & PCI_IRQ_INTX) && min_vecs == 1 && dev->irq) return 1; return -ENOSPC; } @@ -1760,25 +1751,6 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, { return cpu_possible_mask; } - -static inline bool pci_create_ims_domain(struct pci_dev *pdev, - const struct msi_domain_template *template, - unsigned int hwsize, void *data) -{ return false; } - -static inline struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, - union msi_instance_cookie *icookie, - const struct irq_affinity_desc *affdesc) -{ - struct msi_map map = { .index = -ENOSYS, }; - - return map; -} - -static inline void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map) -{ -} - #endif /** @@ -1821,17 +1793,21 @@ extern bool pcie_ports_native; #define pcie_ports_native false #endif -#define PCIE_LINK_STATE_L0S BIT(0) -#define PCIE_LINK_STATE_L1 BIT(1) -#define PCIE_LINK_STATE_CLKPM BIT(2) -#define PCIE_LINK_STATE_L1_1 BIT(3) -#define PCIE_LINK_STATE_L1_2 BIT(4) -#define PCIE_LINK_STATE_L1_1_PCIPM BIT(5) -#define PCIE_LINK_STATE_L1_2_PCIPM BIT(6) -#define PCIE_LINK_STATE_ALL (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |\ - PCIE_LINK_STATE_CLKPM | PCIE_LINK_STATE_L1_1 |\ - PCIE_LINK_STATE_L1_2 | PCIE_LINK_STATE_L1_1_PCIPM |\ +#define PCIE_LINK_STATE_L0S (BIT(0) | BIT(1)) /* Upstr/dwnstr L0s */ +#define PCIE_LINK_STATE_L1 BIT(2) /* L1 state */ +#define PCIE_LINK_STATE_L1_1 BIT(3) /* ASPM L1.1 state */ +#define PCIE_LINK_STATE_L1_2 BIT(4) /* ASPM L1.2 state */ +#define PCIE_LINK_STATE_L1_1_PCIPM BIT(5) /* PCI-PM L1.1 state */ +#define PCIE_LINK_STATE_L1_2_PCIPM BIT(6) /* PCI-PM L1.2 state */ +#define PCIE_LINK_STATE_ASPM_ALL (PCIE_LINK_STATE_L0S |\ + PCIE_LINK_STATE_L1 |\ + PCIE_LINK_STATE_L1_1 |\ + PCIE_LINK_STATE_L1_2 |\ + PCIE_LINK_STATE_L1_1_PCIPM |\ PCIE_LINK_STATE_L1_2_PCIPM) +#define PCIE_LINK_STATE_CLKPM BIT(7) +#define PCIE_LINK_STATE_ALL (PCIE_LINK_STATE_ASPM_ALL |\ + PCIE_LINK_STATE_CLKPM) #ifdef CONFIG_PCIEASPM int pci_disable_link_state(struct pci_dev *pdev, int state); @@ -2014,10 +1990,9 @@ static inline int pci_register_driver(struct pci_driver *drv) static inline void pci_unregister_driver(struct pci_driver *drv) { } static inline u8 pci_find_capability(struct pci_dev *dev, int cap) { return 0; } -static inline int pci_find_next_capability(struct pci_dev *dev, u8 post, - int cap) +static inline u8 pci_find_next_capability(struct pci_dev *dev, u8 post, int cap) { return 0; } -static inline int pci_find_ext_capability(struct pci_dev *dev, int cap) +static inline u16 pci_find_ext_capability(struct pci_dev *dev, int cap) { return 0; } static inline u64 pci_get_dsn(struct pci_dev *dev) @@ -2517,6 +2492,16 @@ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) return NULL; } +static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) +{ + /* + * error_state is set in pci_dev_set_io_state() using xchg/cmpxchg() + * and read w/o common lock. READ_ONCE() ensures compiler cannot cache + * the value (e.g. inside the loop in pci_dev_wait()). + */ + return READ_ONCE(dev->error_state) == pci_channel_io_perm_failure; +} + void pci_request_acs(void); bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags); bool pci_acs_path_enabled(struct pci_dev *start, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a0c75e467df3..942a587bb97e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -580,6 +580,7 @@ #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb #define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3 #define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb +#define PCI_DEVICE_ID_AMD_1AH_M70H_DF_F3 0x12bb #define PCI_DEVICE_ID_AMD_MI200_DF_F3 0x14d3 #define PCI_DEVICE_ID_AMD_MI300_DF_F3 0x152b #define PCI_DEVICE_ID_AMD_VANGOGH_USB 0x163a @@ -2607,6 +2608,8 @@ #define PCI_VENDOR_ID_ALIBABA 0x1ded +#define PCI_VENDOR_ID_CXL 0x1e98 + #define PCI_VENDOR_ID_TEHUTI 0x1fc9 #define PCI_DEVICE_ID_TEHUTI_3009 0x3009 #define PCI_DEVICE_ID_TEHUTI_3010 0x3010 @@ -2686,8 +2689,10 @@ #define PCI_DEVICE_ID_INTEL_I960 0x0960 #define PCI_DEVICE_ID_INTEL_I960RM 0x0962 #define PCI_DEVICE_ID_INTEL_HDA_HSW_0 0x0a0c +#define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_HDA_HSW_2 0x0c0c #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB 0x0c60 +#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe #define PCI_DEVICE_ID_INTEL_HDA_HSW_3 0x0d0c #define PCI_DEVICE_ID_INTEL_HDA_BYT 0x0f04 #define PCI_DEVICE_ID_INTEL_SST_BYT 0x0f28 @@ -3106,6 +3111,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_CML_S 0xa3f0 #define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 +#define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7 #define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 30581e2e04cc..5802e1deef24 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -4,6 +4,8 @@ #ifndef _PDS_COMMON_H_ #define _PDS_COMMON_H_ +#include <linux/notifier.h> + #define PDS_CORE_DRV_NAME "pds_core" /* the device's internal addressing uses up to 52 bits */ diff --git a/include/linux/peci.h b/include/linux/peci.h index 9b3d36aff431..90e241458ef6 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -58,7 +58,6 @@ static inline struct peci_controller *to_peci_controller(void *d) /** * struct peci_device - PECI device * @dev: device object to register PECI device to the device model - * @controller: manages the bus segment hosting this PECI device * @info: PECI device characteristics * @info.family: device family * @info.model: device model diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8c677f185901..03053de557cf 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -2,6 +2,7 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H +#include <linux/alloc_tag.h> #include <linux/mmdebug.h> #include <linux/preempt.h> #include <linux/smp.h> @@ -9,12 +10,17 @@ #include <linux/pfn.h> #include <linux/init.h> #include <linux/cleanup.h> +#include <linux/sched.h> #include <asm/percpu.h> /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES +#ifdef CONFIG_MEM_ALLOC_PROFILING +#define PERCPU_MODULE_RESERVE (8 << 13) +#else #define PERCPU_MODULE_RESERVE (8 << 10) +#endif #else #define PERCPU_MODULE_RESERVE 0 #endif @@ -121,7 +127,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn); #endif -extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1); extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); @@ -129,14 +134,16 @@ extern bool is_kernel_percpu_address(unsigned long addr); extern void __init setup_per_cpu_areas(void); #endif -extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); -extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); -extern void free_percpu(void __percpu *__pdata); +extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, + gfp_t gfp) __alloc_size(1); extern size_t pcpu_alloc_size(void __percpu *__pdata); -DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) - -extern phys_addr_t per_cpu_ptr_to_phys(void *addr); +#define __alloc_percpu_gfp(_size, _align, _gfp) \ + alloc_hooks(pcpu_alloc_noprof(_size, _align, false, _gfp)) +#define __alloc_percpu(_size, _align) \ + alloc_hooks(pcpu_alloc_noprof(_size, _align, false, GFP_KERNEL)) +#define __alloc_reserved_percpu(_size, _align) \ + alloc_hooks(pcpu_alloc_noprof(_size, _align, true, GFP_KERNEL)) #define alloc_percpu_gfp(type, gfp) \ (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \ @@ -144,6 +151,15 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #define alloc_percpu(type) \ (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) +#define alloc_percpu_noprof(type) \ + ((typeof(type) __percpu *)pcpu_alloc_noprof(sizeof(type), \ + __alignof__(type), false, GFP_KERNEL)) + +extern void free_percpu(void __percpu *__pdata); + +DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) + +extern phys_addr_t per_cpu_ptr_to_phys(void *addr); extern unsigned long pcpu_nr_pages(void); diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 43282e22ebe1..701974639ff2 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -39,6 +39,14 @@ struct cpu_hw_events { DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS); /* currently enabled firmware counters */ DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS); + /* The virtual address of the shared memory where counter snapshot will be taken */ + void *snapshot_addr; + /* The physical address of the shared memory where counter snapshot will be taken */ + phys_addr_t snapshot_addr_phys; + /* Boolean flag to indicate setup is already done */ + bool snapshot_set_done; + /* A shadow copy of the counter values to avoid clobbering during multiple SBI calls */ + u64 snapshot_cval_shcopy[RISCV_MAX_COUNTERS]; }; struct riscv_pmu { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d2a15c0c6f8a..a5304ae8c654 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -809,11 +809,8 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; -#ifdef CONFIG_BPF_SYSCALL - perf_overflow_handler_t orig_overflow_handler; struct bpf_prog *prog; u64 bpf_cookie; -#endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; @@ -883,6 +880,7 @@ struct perf_event_pmu_context { unsigned int nr_events; unsigned int nr_cgroups; + unsigned int nr_freq; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; @@ -897,6 +895,11 @@ struct perf_event_pmu_context { int rotate_necessary; }; +static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc) +{ + return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active); +} + struct perf_event_groups { struct rb_root tree; u64 index; @@ -1342,8 +1345,10 @@ extern int perf_event_output(struct perf_event *event, struct pt_regs *regs); static inline bool -__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) +is_default_overflow_handler(struct perf_event *event) { + perf_overflow_handler_t overflow_handler = event->overflow_handler; + if (likely(overflow_handler == perf_event_output_forward)) return true; if (unlikely(overflow_handler == perf_event_output_backward)) @@ -1351,22 +1356,6 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler) return false; } -#define is_default_overflow_handler(event) \ - __is_default_overflow_handler((event)->overflow_handler) - -#ifdef CONFIG_BPF_SYSCALL -static inline bool uses_default_overflow_handler(struct perf_event *event) -{ - if (likely(is_default_overflow_handler(event))) - return true; - - return __is_default_overflow_handler(event->orig_overflow_handler); -} -#else -#define uses_default_overflow_handler(event) \ - is_default_overflow_handler(event) -#endif - extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, @@ -1697,6 +1686,14 @@ perf_event_addr_filters(struct perf_event *event) return ifh; } +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* Only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + extern void perf_event_addr_filters_sync(struct perf_event *event); extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id); diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h new file mode 100644 index 000000000000..86ba5d33e43b --- /dev/null +++ b/include/linux/pgalloc_tag.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * page allocation tagging + */ +#ifndef _LINUX_PGALLOC_TAG_H +#define _LINUX_PGALLOC_TAG_H + +#include <linux/alloc_tag.h> + +#ifdef CONFIG_MEM_ALLOC_PROFILING + +#include <linux/page_ext.h> + +extern struct page_ext_operations page_alloc_tagging_ops; + +static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) +{ + return (void *)page_ext + page_alloc_tagging_ops.offset; +} + +static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) +{ + return (void *)ref - page_alloc_tagging_ops.offset; +} + +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline union codetag_ref *get_page_tag_ref(struct page *page) +{ + if (page) { + struct page_ext *page_ext = page_ext_get(page); + + if (page_ext) + return codetag_ref_from_page_ext(page_ext); + } + return NULL; +} + +static inline void put_page_tag_ref(union codetag_ref *ref) +{ + page_ext_put(page_ext_from_codetag_ref(ref)); +} + +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_add(ref, task->alloc_tag, PAGE_SIZE * nr); + put_page_tag_ref(ref); + } + } +} + +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + alloc_tag_sub(ref, PAGE_SIZE * nr); + put_page_tag_ref(ref); + } + } +} + +static inline void pgalloc_tag_split(struct page *page, unsigned int nr) +{ + int i; + struct page_ext *page_ext; + union codetag_ref *ref; + struct alloc_tag *tag; + + if (!mem_alloc_profiling_enabled()) + return; + + page_ext = page_ext_get(page); + if (unlikely(!page_ext)) + return; + + ref = codetag_ref_from_page_ext(page_ext); + if (!ref->ct) + goto out; + + tag = ct_to_alloc_tag(ref->ct); + page_ext = page_ext_next(page_ext); + for (i = 1; i < nr; i++) { + /* Set new reference to point to the original tag */ + alloc_tag_ref_set(codetag_ref_from_page_ext(page_ext), tag); + page_ext = page_ext_next(page_ext); + } +out: + page_ext_put(page_ext); +} + +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +{ + struct alloc_tag *tag = NULL; + + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + alloc_tag_sub_check(ref); + if (ref && ref->ct) + tag = ct_to_alloc_tag(ref->ct); + put_page_tag_ref(ref); + } + + return tag; +} + +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) +{ + if (mem_alloc_profiling_enabled() && tag) + this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); +} + +#else /* CONFIG_MEM_ALLOC_PROFILING */ + +static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } +static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, + unsigned int nr) {} +static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} +static inline void pgalloc_tag_split(struct page *page, unsigned int nr) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } +static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} + +#endif /* CONFIG_MEM_ALLOC_PROFILING */ + +#endif /* _LINUX_PGALLOC_TAG_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index f6d0e3513948..18019f037bae 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -50,6 +50,8 @@ #define pmd_pgtable(pmd) pmd_page(pmd) #endif +#define pmd_folio(pmd) page_folio(pmd_page(pmd)) + /* * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] * @@ -149,9 +151,7 @@ static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address) * a shortcut which implies the use of the kernel's pgd, instead * of a process's */ -#ifndef pgd_offset_k #define pgd_offset_k(address) pgd_offset(&init_mm, (address)) -#endif /* * In many cases it is known that a virtual address is mapped at PMD or PTE @@ -212,15 +212,37 @@ static inline int pmd_dirty(pmd_t pmd) #define arch_flush_lazy_mmu_mode() do {} while (0) #endif -#ifndef set_ptes +#ifndef pte_batch_hint +/** + * pte_batch_hint - Number of pages that can be added to batch without scanning. + * @ptep: Page table pointer for the entry. + * @pte: Page table entry. + * + * Some architectures know that a set of contiguous ptes all map the same + * contiguous memory with the same permissions. In this case, it can provide a + * hint to aid pte batching without the core code needing to scan every pte. + * + * An architecture implementation may ignore the PTE accessed state. Further, + * the dirty state must apply atomically to all the PTEs described by the hint. + * + * May be overridden by the architecture, else pte_batch_hint is always 1. + */ +static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte) +{ + return 1; +} +#endif -#ifndef pte_next_pfn -static inline pte_t pte_next_pfn(pte_t pte) +#ifndef pte_advance_pfn +static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr) { - return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT)); + return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT)); } #endif +#define pte_next_pfn(pte) pte_advance_pfn(pte, 1) + +#ifndef set_ptes /** * set_ptes - Map consecutive pages to a contiguous range of addresses. * @mm: Address space to map the pages into. @@ -229,6 +251,10 @@ static inline pte_t pte_next_pfn(pte_t pte) * @pte: Page table entry for the first page. * @nr: Number of pages to map. * + * When nr==1, initial state of pte may be present or not present, and new state + * may be present or not present. When nr>1, initial state of all ptes must be + * not present, and new state must be present. + * * May be overridden by the architecture, or the architecture can define * set_pte() and PFN_PTE_SHIFT. * @@ -433,6 +459,50 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #endif +#ifndef clear_young_dirty_ptes +/** + * clear_young_dirty_ptes - Mark PTEs that map consecutive pages of the + * same folio as old/clean. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to mark old/clean. + * @flags: Flags to modify the PTE batch semantics. + * + * May be overridden by the architecture; otherwise, implemented by + * get_and_clear/modify/set for each pte in the range. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + unsigned int nr, cydp_t flags) +{ + pte_t pte; + + for (;;) { + if (flags == CYDP_CLEAR_YOUNG) + ptep_test_and_clear_young(vma, addr, ptep); + else { + pte = ptep_get_and_clear(vma->vm_mm, addr, ptep); + if (flags & CYDP_CLEAR_YOUNG) + pte = pte_mkold(pte); + if (flags & CYDP_CLEAR_DIRTY) + pte = pte_mkclean(pte); + set_pte_at(vma->vm_mm, addr, ptep, pte); + } + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif + static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -580,6 +650,76 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, } #endif +#ifndef get_and_clear_full_ptes +/** + * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of + * the same folio, collecting dirty/accessed bits. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the + * returned PTE. + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr, int full) +{ + pte_t pte, tmp_pte; + + pte = ptep_get_and_clear_full(mm, addr, ptep, full); + while (--nr) { + ptep++; + addr += PAGE_SIZE; + tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full); + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } + return pte; +} +#endif + +#ifndef clear_full_ptes +/** + * clear_full_ptes - Clear present PTEs that map consecutive pages of the same + * folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_get_and_clear_full(). + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + ptep_get_and_clear_full(mm, addr, ptep, full); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif /* * If two threads concurrently fault at the same page, the thread that @@ -612,6 +752,35 @@ static inline void pte_clear_not_present_full(struct mm_struct *mm, } #endif +#ifndef clear_not_present_full_ptes +/** + * clear_not_present_full_ptes - Clear multiple not present PTEs which are + * consecutive in the pgtable. + * @mm: Address space the ptes represent. + * @addr: Address of the first pte. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to clear. + * @full: Whether we are clearing a full mm. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over pte_clear_not_present_full(). + * + * Context: The caller holds the page table lock. The PTEs are all not present. + * The PTEs are all in the same PMD. + */ +static inline void clear_not_present_full_ptes(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, unsigned int nr, int full) +{ + for (;;) { + pte_clear_not_present_full(mm, addr, ptep, full); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif + #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH extern pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, @@ -650,6 +819,37 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres } #endif +#ifndef wrprotect_ptes +/** + * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same + * folio. + * @mm: Address space the pages are mapped into. + * @addr: Address the first page is mapped at. + * @ptep: Page table pointer for the first entry. + * @nr: Number of entries to write-protect. + * + * May be overridden by the architecture; otherwise, implemented as a simple + * loop over ptep_set_wrprotect(). + * + * Note that PTE bits in the PTE range besides the PFN can differ. For example, + * some PTEs might be write-protected. + * + * Context: The caller holds the page table lock. The PTEs map consecutive + * pages that belong to the same folio. The PTEs are all in the same PMD. + */ +static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ + for (;;) { + ptep_set_wrprotect(mm, addr, ptep); + if (--nr == 0) + break; + ptep++; + addr += PAGE_SIZE; + } +} +#endif + /* * On some architectures hardware does not set page access bit when accessing * memory page, it is responsibility of software setting this bit. It brings @@ -925,7 +1125,7 @@ static inline int arch_unmap_one(struct mm_struct *mm, * prototypes must be defined in the arch-specific asm/pgtable.h file. */ #ifndef __HAVE_ARCH_PREPARE_TO_SWAP -static inline int arch_prepare_to_swap(struct page *page) +static inline int arch_prepare_to_swap(struct folio *folio) { return 0; } @@ -952,7 +1152,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) #endif #ifndef __HAVE_ARCH_MOVE_PTE -#define move_pte(pte, prot, old_addr, new_addr) (pte) +#define move_pte(pte, old_addr, new_addr) (pte) #endif #ifndef pte_accessible @@ -1643,23 +1843,37 @@ typedef unsigned int pgtbl_mod_mask; #endif /* - * p?d_leaf() - true if this entry is a final mapping to a physical address. - * This differs from p?d_huge() by the fact that they are always available (if - * the architecture supports large pages at the appropriate level) even - * if CONFIG_HUGETLB_PAGE is not defined. - * Only meaningful when called on a valid entry. + * pXd_leaf() is the API to check whether a pgtable entry is a huge page + * mapping. It should work globally across all archs, without any + * dependency on CONFIG_* options. For architectures that do not support + * huge mappings on specific levels, below fallbacks will be used. + * + * A leaf pgtable entry should always imply the following: + * + * - It is a "present" entry. IOW, before using this API, please check it + * with pXd_present() first. NOTE: it may not always mean the "present + * bit" is set. For example, PROT_NONE entries are always "present". + * + * - It should _never_ be a swap entry of any type. Above "present" check + * should have guarded this, but let's be crystal clear on this. + * + * - It should contain a huge PFN, which points to a huge page larger than + * PAGE_SIZE of the platform. The PFN format isn't important here. + * + * - It should cover all kinds of huge mappings (e.g., pXd_trans_huge(), + * pXd_devmap(), or hugetlb mappings). */ #ifndef pgd_leaf -#define pgd_leaf(x) 0 +#define pgd_leaf(x) false #endif #ifndef p4d_leaf -#define p4d_leaf(x) 0 +#define p4d_leaf(x) false #endif #ifndef pud_leaf -#define pud_leaf(x) 0 +#define pud_leaf(x) false #endif #ifndef pmd_leaf -#define pmd_leaf(x) 0 +#define pmd_leaf(x) false #endif #ifndef pgd_leaf_size @@ -1679,6 +1893,20 @@ typedef unsigned int pgtbl_mod_mask; #endif /* + * We always define pmd_pfn for all archs as it's used in lots of generic + * code. Now it happens too for pud_pfn (and can happen for larger + * mappings too in the future; we're not there yet). Instead of defining + * it for all archs (like pmd_pfn), provide a fallback. + * + * Note that returning 0 here means any arch that didn't define this can + * get severely wrong when it hits a real pud leaf. It's arch's + * responsibility to properly define it when a huge pud is possible. + */ +#ifndef pud_pfn +#define pud_pfn(x) 0 +#endif + +/* * Some architectures have MMUs that are configurable or selectable at boot * time. These lead to variable PTRS_PER_x. For statically allocated arrays it * helps to have a static maximum value. diff --git a/include/linux/phy.h b/include/linux/phy.h index 684efaeca07c..e6e83304558e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -30,6 +30,7 @@ #include <linux/refcount.h> #include <linux/atomic.h> +#include <net/eee.h> #define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ SUPPORTED_TP | \ @@ -54,6 +55,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; +extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features) #define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features) @@ -65,6 +67,7 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; #define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) #define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) #define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) +#define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; extern const int phy_fibre_port_array[1]; @@ -329,6 +332,7 @@ struct mdio_bus_stats { * struct phy_package_shared - Shared information in PHY packages * @base_addr: Base PHY address of PHY package used to combine PHYs * in one package and for offset calculation of phy_package_read/write + * @np: Pointer to the Device Node if PHY package defined in DT * @refcnt: Number of PHYs connected to this shared data * @flags: Initialization of PHY package * @priv_size: Size of the shared private data @priv @@ -340,6 +344,8 @@ struct mdio_bus_stats { */ struct phy_package_shared { u8 base_addr; + /* With PHY package defined in DT this points to the PHY package node */ + struct device_node *np; refcount_t refcnt; unsigned long flags; size_t priv_size; @@ -589,6 +595,8 @@ struct macsec_ops; * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes * @eee_enabled: Flag indicating whether the EEE feature is enabled + * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -638,7 +646,7 @@ struct phy_device { /* Information about the PHY type */ /* And management functions */ - struct phy_driver *drv; + const struct phy_driver *drv; struct device_link *devlink; @@ -698,7 +706,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); /* used with phy_speed_down */ __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); - /* used for eee validation */ + /* used for eee validation and configuration*/ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); bool eee_enabled; @@ -708,6 +716,8 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ u32 eee_broken_modes; + bool enable_tx_lpi; + struct eee_config eee_cfg; #ifdef CONFIG_LED_TRIGGER_PHY struct phy_led_trigger *phy_led_triggers; @@ -768,6 +778,7 @@ struct phy_device { /* Generic phy_device::dev_flags */ #define PHY_F_NO_IRQ 0x80000000 +#define PHY_F_RXC_ALWAYS_ON 0x40000000 static inline struct phy_device *to_phy_device(const struct device *dev) { @@ -852,6 +863,15 @@ struct phy_plca_status { bool pst; }; +/* Modes for PHY LED configuration */ +enum phy_led_modes { + PHY_LED_ACTIVE_LOW = 0, + PHY_LED_INACTIVE_HIGH_IMPEDANCE = 1, + + /* keep it last */ + __PHY_LED_MODES_NUM, +}; + /** * struct phy_led: An LED driven by the PHY * @@ -1145,6 +1165,19 @@ struct phy_driver { int (*led_hw_control_get)(struct phy_device *dev, u8 index, unsigned long *rules); + /** + * @led_polarity_set: Set the LED polarity modes + * @dev: PHY device which has the LED + * @index: Which LED of the PHY device + * @modes: bitmap of LED polarity modes + * + * Configure LED with all the required polarity modes in @modes + * to make it correctly turn ON or OFF. + * + * Returns 0, or an error code. + */ + int (*led_polarity_set)(struct phy_device *dev, int index, + unsigned long modes); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) @@ -1851,7 +1884,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum, /* Clause 37 */ int genphy_c37_config_aneg(struct phy_device *phydev); -int genphy_c37_read_status(struct phy_device *phydev); +int genphy_c37_read_status(struct phy_device *phydev, bool *changed); /* Clause 45 PHY */ int genphy_c45_restart_aneg(struct phy_device *phydev); @@ -1886,9 +1919,9 @@ int genphy_c45_plca_get_status(struct phy_device *phydev, int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, unsigned long *lp, bool *is_enabled); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, - struct ethtool_eee *data); + struct ethtool_keee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); @@ -1938,8 +1971,10 @@ int phy_get_rate_matching(struct phy_device *phydev, void phy_set_max_speed(struct phy_device *phydev, u32 max_speed); void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode); void phy_advertise_supported(struct phy_device *phydev); +void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); +void phy_support_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); @@ -1966,8 +2001,8 @@ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); -int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); -int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data); +int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); +int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int phy_ethtool_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); void phy_ethtool_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol); @@ -1977,9 +2012,12 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size); +int of_phy_package_join(struct phy_device *phydev, size_t priv_size); void phy_package_leave(struct phy_device *phydev); int devm_phy_package_join(struct device *dev, struct phy_device *phydev, int base_addr, size_t priv_size); +int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, + size_t priv_size); int __init mdio_bus_init(void); void mdio_bus_exit(void); @@ -2100,7 +2138,7 @@ static inline bool phy_package_probe_once(struct phy_device *phydev) return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE); } -extern struct bus_type mdio_bus_type; +extern const struct bus_type mdio_bus_type; struct mdio_board_info { const char *bus_id; diff --git a/include/linux/phy/phy-dp.h b/include/linux/phy/phy-dp.h index 18cad23642cd..9cce5766bc0b 100644 --- a/include/linux/phy/phy-dp.h +++ b/include/linux/phy/phy-dp.h @@ -8,6 +8,9 @@ #include <linux/types.h> +#define PHY_SUBMODE_DP 0 +#define PHY_SUBMODE_EDP 1 + /** * struct phy_configure_opts_dp - DisplayPort PHY configuration set * diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index f6d607ef0e80..03cd5bae92d3 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -122,6 +122,11 @@ struct phy_ops { union phy_configure_opts *opts); int (*reset)(struct phy *phy); int (*calibrate)(struct phy *phy); + + /* notify phy connect status change */ + int (*connect)(struct phy *phy, int port); + int (*disconnect)(struct phy *phy, int port); + void (*release)(struct phy *phy); struct module *owner; }; @@ -176,7 +181,7 @@ struct phy_provider { struct module *owner; struct list_head list; struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); }; /** @@ -243,6 +248,8 @@ static inline enum phy_mode phy_get_mode(struct phy *phy) } int phy_reset(struct phy *phy); int phy_calibrate(struct phy *phy); +int phy_notify_connect(struct phy *phy, int port); +int phy_notify_disconnect(struct phy *phy, int port); static inline int phy_get_bus_width(struct phy *phy) { return phy->attrs.bus_width; @@ -265,7 +272,7 @@ void phy_put(struct device *dev, struct phy *phy); void devm_phy_put(struct device *dev, struct phy *phy); struct phy *of_phy_get(struct device_node *np, const char *con_id); struct phy *of_phy_simple_xlate(struct device *dev, - struct of_phandle_args *args); + const struct of_phandle_args *args); struct phy *phy_create(struct device *dev, struct device_node *node, const struct phy_ops *ops); struct phy *devm_phy_create(struct device *dev, struct device_node *node, @@ -275,11 +282,11 @@ void devm_phy_destroy(struct device *dev, struct phy *phy); struct phy_provider *__of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); struct phy_provider *__devm_of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)); + const struct of_phandle_args *args)); void of_phy_provider_unregister(struct phy_provider *phy_provider); void devm_of_phy_provider_unregister(struct device *dev, struct phy_provider *phy_provider); @@ -396,6 +403,20 @@ static inline int phy_calibrate(struct phy *phy) return -ENOSYS; } +static inline int phy_notify_connect(struct phy *phy, int index) +{ + if (!phy) + return 0; + return -ENOSYS; +} + +static inline int phy_notify_disconnect(struct phy *phy, int index) +{ + if (!phy) + return 0; + return -ENOSYS; +} + static inline int phy_configure(struct phy *phy, union phy_configure_opts *opts) { @@ -479,7 +500,7 @@ static inline struct phy *of_phy_get(struct device_node *np, const char *con_id) } static inline struct phy *of_phy_simple_xlate(struct device *dev, - struct of_phandle_args *args) + const struct of_phandle_args *args) { return ERR_PTR(-ENOSYS); } @@ -509,7 +530,7 @@ static inline void devm_phy_destroy(struct device *dev, struct phy *phy) static inline struct phy_provider *__of_phy_provider_register( struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } @@ -517,7 +538,7 @@ static inline struct phy_provider *__of_phy_provider_register( static inline struct phy_provider *__devm_of_phy_provider_register(struct device *dev, struct device_node *children, struct module *owner, struct phy * (*of_xlate)(struct device *dev, - struct of_phandle_args *args)) + const struct of_phandle_args *args)) { return ERR_PTR(-ENOSYS); } diff --git a/include/linux/phy/tegra/xusb.h b/include/linux/phy/tegra/xusb.h index 70998e6dd6fd..6ca51e0080ec 100644 --- a/include/linux/phy/tegra/xusb.h +++ b/include/linux/phy/tegra/xusb.h @@ -26,6 +26,7 @@ void tegra_phy_xusb_utmi_pad_power_down(struct phy *phy); int tegra_phy_xusb_utmi_port_reset(struct phy *phy); int tegra_xusb_padctl_get_usb3_companion(struct tegra_xusb_padctl *padctl, unsigned int port); +int tegra_xusb_padctl_get_port_number(struct phy *phy); int tegra_xusb_padctl_enable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy, enum usb_device_speed speed); int tegra_xusb_padctl_disable_phy_sleepwalk(struct tegra_xusb_padctl *padctl, struct phy *phy); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index d589f89c612c..5ea6b2ad2396 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -138,6 +138,9 @@ enum phylink_op_type { * @poll_fixed_state: if true, starts link_poll, * if MAC link is at %MLO_AN_FIXED mode. * @mac_managed_pm: if true, indicate the MAC driver is responsible for PHY PM. + * @mac_requires_rxc: if true, the MAC always requires a receive clock from PHY. + * The PHY driver should start the clock signal as soon as + * possible and avoid stopping it during suspend events. * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. @@ -150,6 +153,7 @@ struct phylink_config { enum phylink_op_type type; bool poll_fixed_state; bool mac_managed_pm; + bool mac_requires_rxc; bool ovr_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); @@ -392,6 +396,10 @@ struct phylink_pcs_ops; * @phylink: pointer to &struct phylink_config * @neg_mode: provide PCS neg mode via "mode" argument * @poll: poll the PCS for link changes + * @rxc_always_on: The MAC driver requires the reference clock + * to always be on. Standalone PCS drivers which + * do not have access to a PHY device can check + * this instead of PHY_F_RXC_ALWAYS_ON. * * This structure is designed to be embedded within the PCS private data, * and will be passed between phylink and the PCS. @@ -404,6 +412,7 @@ struct phylink_pcs { struct phylink *phylink; bool neg_mode; bool poll; + bool rxc_always_on; }; /** @@ -418,6 +427,8 @@ struct phylink_pcs { * @pcs_an_restart: restart 802.3z BaseX autonegotiation. * @pcs_link_up: program the PCS for the resolved link configuration * (where necessary). + * @pcs_pre_init: configure PCS components necessary for MAC hardware + * initialization e.g. RX clock for stmmac. */ struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, @@ -437,6 +448,7 @@ struct phylink_pcs_ops { void (*pcs_an_restart)(struct phylink_pcs *pcs); void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); + int (*pcs_pre_init)(struct phylink_pcs *pcs); }; #if 0 /* For kernel-doc purposes only. */ @@ -480,9 +492,6 @@ void pcs_disable(struct phylink_pcs *pcs); * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. - * - * When present, this overrides pcs_get_state() in &struct - * phylink_pcs_ops. */ void pcs_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state); @@ -545,6 +554,34 @@ void pcs_an_restart(struct phylink_pcs *pcs); */ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); + +/** + * pcs_pre_init() - Configure PCS components necessary for MAC initialization + * @pcs: a pointer to a &struct phylink_pcs. + * + * This function can be called by MAC drivers through the + * phylink_pcs_pre_init() wrapper, before their hardware is initialized. It + * should not be called after the link is brought up, as reconfiguring the PCS + * at this point could break the link. + * + * Some MAC devices require specific hardware initialization to be performed by + * their associated PCS device before they can properly initialize their own + * hardware. An example of this is the initialization of stmmac controllers, + * which requires an active REF_CLK signal to be provided by the PHY/PCS. + * + * By calling phylink_pcs_pre_init(), MAC drivers can ensure that the PCS is + * setup in a way that allows for successful hardware initialization. + * + * The specific configuration performed by pcs_pre_init() is dependent on the + * model of PCS and the requirements of the MAC device attached to it. PCS + * driver authors should consider whether their target device is to be used in + * conjunction with a MAC device whose driver calls phylink_pcs_pre_init(). MAC + * driver authors should document their requirements for the PCS + * pre-initialization. + * + */ +int pcs_pre_init(struct phylink_pcs *pcs); + #endif struct phylink *phylink_create(struct phylink_config *, @@ -564,6 +601,8 @@ void phylink_disconnect_phy(struct phylink *); void phylink_mac_change(struct phylink *, bool up); void phylink_pcs_change(struct phylink_pcs *, bool up); +int phylink_pcs_pre_init(struct phylink *pl, struct phylink_pcs *pcs); + void phylink_start(struct phylink *); void phylink_stop(struct phylink *); @@ -584,8 +623,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); int phylink_init_eee(struct phylink *, bool); -int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *); -int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *); +int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); +int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); int phylink_speed_down(struct phylink *pl, bool sync); int phylink_speed_up(struct phylink *pl); diff --git a/include/linux/pid.h b/include/linux/pid.h index 395cacce1179..a3aad9b4074c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -45,6 +45,8 @@ * find_pid_ns() using the int nr and struct pid_namespace *ns. */ +#define RESERVED_PIDS 300 + struct upid { int nr; struct pid_namespace *ns; @@ -55,6 +57,8 @@ struct pid refcount_t count; unsigned int level; spinlock_t lock; + struct dentry *stashed; + u64 ino; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -66,15 +70,13 @@ struct pid extern struct pid init_struct_pid; -extern const struct file_operations pidfd_fops; - struct file; -extern struct pid *pidfd_pid(const struct file *file); +struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_create(struct pid *pid, unsigned int flags); int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) { diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h new file mode 100644 index 000000000000..75bdf9807802 --- /dev/null +++ b/include/linux/pidfs.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PID_FS_H +#define _LINUX_PID_FS_H + +struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); +void __init pidfs_init(void); + +#endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 79594aeb160d..2f1b952d596a 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -154,9 +154,9 @@ struct packet_stacked_data struct pktcdvd_device { - struct bdev_handle *bdev_handle; /* dev attached */ + struct file *bdev_file; /* dev attached */ /* handle acquired for bdev during pkt_open_dev() */ - struct bdev_handle *open_bdev_handle; + struct file *f_open_bdev; dev_t pkt_dev; /* our dev */ struct packet_settings settings; struct packet_stats stats; diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h index f922a192fe58..ec99b7b73d1d 100644 --- a/include/linux/platform_data/brcmfmac.h +++ b/include/linux/platform_data/brcmfmac.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 201 Broadcom Corporation + * Copyright (c) 2016 Broadcom Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 7dae17b62a4d..ecc47d5fe239 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3961,60 +3961,52 @@ struct ec_response_i2c_passthru { } __ec_align1; /*****************************************************************************/ -/* Power button hang detect */ - +/* AP hang detect */ #define EC_CMD_HANG_DETECT 0x009F -/* Reasons to start hang detection timer */ -/* Power button pressed */ -#define EC_HANG_START_ON_POWER_PRESS BIT(0) - -/* Lid closed */ -#define EC_HANG_START_ON_LID_CLOSE BIT(1) - - /* Lid opened */ -#define EC_HANG_START_ON_LID_OPEN BIT(2) - -/* Start of AP S3->S0 transition (booting or resuming from suspend) */ -#define EC_HANG_START_ON_RESUME BIT(3) - -/* Reasons to cancel hang detection */ +#define EC_HANG_DETECT_MIN_TIMEOUT 5 +#define EC_HANG_DETECT_MAX_TIMEOUT 65535 -/* Power button released */ -#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8) +/* EC hang detect commands */ +enum ec_hang_detect_cmds { + /* Reload AP hang detect timer. */ + EC_HANG_DETECT_CMD_RELOAD = 0x0, -/* Any host command from AP received */ -#define EC_HANG_STOP_ON_HOST_COMMAND BIT(9) + /* Stop AP hang detect timer. */ + EC_HANG_DETECT_CMD_CANCEL = 0x1, -/* Stop on end of AP S0->S3 transition (suspending or shutting down) */ -#define EC_HANG_STOP_ON_SUSPEND BIT(10) + /* Configure watchdog with given reboot timeout and + * cancel currently running AP hang detect timer. + */ + EC_HANG_DETECT_CMD_SET_TIMEOUT = 0x2, -/* - * If this flag is set, all the other fields are ignored, and the hang detect - * timer is started. This provides the AP a way to start the hang timer - * without reconfiguring any of the other hang detect settings. Note that - * you must previously have configured the timeouts. - */ -#define EC_HANG_START_NOW BIT(30) + /* Get last hang status - whether the AP boot was clear or not */ + EC_HANG_DETECT_CMD_GET_STATUS = 0x3, -/* - * If this flag is set, all the other fields are ignored (including - * EC_HANG_START_NOW). This provides the AP a way to stop the hang timer - * without reconfiguring any of the other hang detect settings. - */ -#define EC_HANG_STOP_NOW BIT(31) + /* Clear last hang status. Called when AP is rebooting/shutting down + * gracefully. + */ + EC_HANG_DETECT_CMD_CLEAR_STATUS = 0x4 +}; struct ec_params_hang_detect { - /* Flags; see EC_HANG_* */ - uint32_t flags; - - /* Timeout in msec before generating host event, if enabled */ - uint16_t host_event_timeout_msec; + uint16_t command; /* enum ec_hang_detect_cmds */ + /* Timeout in seconds before generating reboot */ + uint16_t reboot_timeout_sec; +} __ec_align2; - /* Timeout in msec before generating warm reboot, if enabled */ - uint16_t warm_reboot_timeout_msec; -} __ec_align4; +/* Status codes that describe whether AP has boot normally or the hang has been + * detected and EC has reset AP + */ +enum ec_hang_detect_status { + EC_HANG_DETECT_AP_BOOT_NORMAL = 0x0, + EC_HANG_DETECT_AP_BOOT_EC_WDT = 0x1, + EC_HANG_DETECT_AP_BOOT_COUNT, +}; +struct ec_response_hang_detect { + uint8_t status; /* enum ec_hang_detect_status */ +} __ec_align1; /*****************************************************************************/ /* Commands for battery charging */ diff --git a/include/linux/platform_data/davinci_asp.h b/include/linux/platform_data/davinci_asp.h index c8645b2ed3c0..b9c8520b4bd3 100644 --- a/include/linux/platform_data/davinci_asp.h +++ b/include/linux/platform_data/davinci_asp.h @@ -26,16 +26,6 @@ struct davinci_mcasp_pdata { struct gen_pool *sram_pool; /* - * If McBSP peripheral gets the clock from an external pin, - * there are three chooses, that are MCBSP_CLKX, MCBSP_CLKR - * and MCBSP_CLKS. - * Depending on different hardware connections it is possible - * to use this setting to change the behaviour of McBSP - * driver. - */ - int clk_input_pin; - - /* * This flag works when both clock and FS are outputs for the cpu * and makes clock more accurate (FS is not symmetrical and the * clock is very fast. @@ -91,11 +81,6 @@ enum { MCASP_VERSION_OMAP, /* OMAP4/5 */ }; -enum mcbsp_clk_input_pin { - MCBSP_CLKR = 0, /* as in DM365 */ - MCBSP_CLKS, -}; - #define INACTIVE_MODE 0 #define TX_MODE 1 #define RX_MODE 2 diff --git a/include/linux/platform_data/i2c-mux-gpio.h b/include/linux/platform_data/i2c-mux-gpio.h index 5e4c2c272a73..816a4cd3ccb5 100644 --- a/include/linux/platform_data/i2c-mux-gpio.h +++ b/include/linux/platform_data/i2c-mux-gpio.h @@ -18,7 +18,6 @@ * @values: Array of bitmasks of GPIO settings (low/high) for each * position * @n_values: Number of multiplexer positions (busses to instantiate) - * @classes: Optional I2C auto-detection classes * @idle: Bitmask to write to MUX when idle or GPIO_I2CMUX_NO_IDLE if not used */ struct i2c_mux_gpio_platform_data { @@ -26,7 +25,6 @@ struct i2c_mux_gpio_platform_data { int base_nr; const unsigned *values; int n_values; - const unsigned *classes; unsigned idle; }; diff --git a/include/linux/platform_data/mdio-bcm-unimac.h b/include/linux/platform_data/mdio-bcm-unimac.h index 8a5f9f0b2c52..724e1f57b81f 100644 --- a/include/linux/platform_data/mdio-bcm-unimac.h +++ b/include/linux/platform_data/mdio-bcm-unimac.h @@ -1,11 +1,14 @@ #ifndef __MDIO_BCM_UNIMAC_PDATA_H #define __MDIO_BCM_UNIMAC_PDATA_H +struct clk; + struct unimac_mdio_pdata { u32 phy_mask; int (*wait_func)(void *data); void *wait_func_data; const char *bus_name; + struct clk *clk; }; #define UNIMAC_MDIO_DRV_NAME "unimac-mdio" diff --git a/include/linux/platform_data/microchip-ksz.h b/include/linux/platform_data/microchip-ksz.h index f177416635a2..8c659db4da6b 100644 --- a/include/linux/platform_data/microchip-ksz.h +++ b/include/linux/platform_data/microchip-ksz.h @@ -33,6 +33,7 @@ enum ksz_chip_id { KSZ9897_CHIP_ID = 0x00989700, KSZ9893_CHIP_ID = 0x00989300, KSZ9563_CHIP_ID = 0x00956300, + KSZ8567_CHIP_ID = 0x00856700, KSZ9567_CHIP_ID = 0x00956700, LAN9370_CHIP_ID = 0x00937000, LAN9371_CHIP_ID = 0x00937100, diff --git a/include/linux/platform_data/net-cw1200.h b/include/linux/platform_data/net-cw1200.h index c510734405bb..89d0ec6f7d46 100644 --- a/include/linux/platform_data/net-cw1200.h +++ b/include/linux/platform_data/net-cw1200.h @@ -14,8 +14,6 @@ struct cw1200_platform_data_spi { /* All others are optional */ bool have_5ghz; - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int (*power_ctrl)(const struct cw1200_platform_data_spi *pdata, bool enable); /* Control 3v3 / 1v8 supply */ int (*clk_ctrl)(const struct cw1200_platform_data_spi *pdata, @@ -30,8 +28,6 @@ struct cw1200_platform_data_sdio { /* All others are optional */ bool have_5ghz; bool no_nptb; /* SDIO hardware does not support non-power-of-2-blocksizes */ - int reset; /* GPIO to RSTn signal (0 disables) */ - int powerup; /* GPIO to POWERUP signal (0 disables) */ int irq; /* IRQ line or 0 to use SDIO IRQ */ int (*power_ctrl)(const struct cw1200_platform_data_sdio *pdata, bool enable); /* Control 3v3 / 1v8 supply */ diff --git a/include/linux/platform_data/omap1_bl.h b/include/linux/platform_data/omap1_bl.h index 5e8b17d77a5f..3d0bab31a0a9 100644 --- a/include/linux/platform_data/omap1_bl.h +++ b/include/linux/platform_data/omap1_bl.h @@ -6,7 +6,6 @@ struct omap_backlight_config { int default_intensity; - int (*set_power)(struct device *dev, int state); }; #endif diff --git a/include/linux/platform_data/spi-omap2-mcspi.h b/include/linux/platform_data/spi-omap2-mcspi.h index 3b400b1919a9..9e3c15b4ac91 100644 --- a/include/linux/platform_data/spi-omap2-mcspi.h +++ b/include/linux/platform_data/spi-omap2-mcspi.h @@ -16,9 +16,6 @@ struct omap2_mcspi_platform_config { struct omap2_mcspi_device_config { unsigned turbo_mode:1; - - /* toggle chip select after every word */ - unsigned cs_per_word:1; }; #endif diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index eb556f988d57..d8f15770a522 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -71,7 +71,6 @@ struct sysc_regbits { #define SYSC_QUIRK_SWSUP_SIDLE_ACT BIT(12) #define SYSC_QUIRK_SWSUP_SIDLE BIT(11) #define SYSC_QUIRK_EXT_OPT_CLOCK BIT(10) -#define SYSC_QUIRK_LEGACY_IDLE BIT(9) #define SYSC_QUIRK_RESET_STATUS BIT(8) #define SYSC_QUIRK_NO_IDLE BIT(7) #define SYSC_QUIRK_NO_IDLE_ON_INIT BIT(6) diff --git a/include/linux/platform_data/uio_pruss.h b/include/linux/platform_data/uio_pruss.h deleted file mode 100644 index f76fa393b802..000000000000 --- a/include/linux/platform_data/uio_pruss.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * include/linux/platform_data/uio_pruss.h - * - * Platform data for uio_pruss driver - * - * Copyright (C) 2010-11 Texas Instruments Incorporated - https://www.ti.com/ - */ - -#ifndef _UIO_PRUSS_H_ -#define _UIO_PRUSS_H_ - -/* To configure the PRUSS INTC base offset for UIO driver */ -struct uio_pruss_pdata { - u32 pintc_base; - struct gen_pool *sram_pool; -}; -#endif /* _UIO_PRUSS_H_ */ diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index ab1c7deff118..3eb5cd6773ad 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -71,6 +71,7 @@ #define ASUS_WMI_DEVID_LID_FLIP 0x00060062 #define ASUS_WMI_DEVID_LID_FLIP_ROG 0x00060077 #define ASUS_WMI_DEVID_MINI_LED_MODE 0x0005001E +#define ASUS_WMI_DEVID_MINI_LED_MODE2 0x0005002E /* Storage */ #define ASUS_WMI_DEVID_CARDREADER 0x00080013 @@ -127,13 +128,18 @@ /* gpu mux switch, 0 = dGPU, 1 = Optimus */ #define ASUS_WMI_DEVID_GPU_MUX 0x00090016 +#define ASUS_WMI_DEVID_GPU_MUX_VIVO 0x00090026 /* TUF laptop RGB modes/colours */ #define ASUS_WMI_DEVID_TUF_RGB_MODE 0x00100056 +#define ASUS_WMI_DEVID_TUF_RGB_MODE2 0x0010005A /* TUF laptop RGB power/state */ #define ASUS_WMI_DEVID_TUF_RGB_STATE 0x00100057 +/* Bootup sound control */ +#define ASUS_WMI_DEVID_BOOT_SOUND 0x00130022 + /* DSTS masks */ #define ASUS_WMI_DSTS_STATUS_BIT 0x00000001 #define ASUS_WMI_DSTS_UNKNOWN_BIT 0x00000002 diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h index b8a701c77fd0..161e4bc1c9ee 100644 --- a/include/linux/platform_data/x86/pmc_atom.h +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -43,6 +43,19 @@ BIT_ORED_DEDICATED_IRQ_GPSC | \ BIT_SHARED_IRQ_GPSS) +/* External clk generator settings */ +#define PMC_CLK_CTL_OFFSET 0x60 +#define PMC_CLK_CTL_SIZE 4 +#define PMC_CLK_NUM 6 +#define PMC_CLK_CTL_GATED_ON_D3 0x0 +#define PMC_CLK_CTL_FORCE_ON 0x1 +#define PMC_CLK_CTL_FORCE_OFF 0x2 +#define PMC_CLK_CTL_RESERVED 0x3 +#define PMC_MASK_CLK_CTL GENMASK(1, 0) +#define PMC_MASK_CLK_FREQ BIT(2) +#define PMC_CLK_FREQ_XTAL (0 << 2) /* 25 MHz */ +#define PMC_CLK_FREQ_PLL (1 << 2) /* 19.2 MHz */ + /* The timers accumulate time spent in sleep state */ #define PMC_S0IR_TMR 0x80 #define PMC_S0I1_TMR 0x84 @@ -104,14 +117,14 @@ #define BIT_SCC_SDIO BIT(9) #define BIT_SCC_SDCARD BIT(10) #define BIT_SCC_MIPI BIT(11) -#define BIT_HDA BIT(12) +#define BIT_HDA BIT(12) /* CHT datasheet: reserved */ #define BIT_LPE BIT(13) #define BIT_OTG BIT(14) -#define BIT_USH BIT(15) -#define BIT_GBE BIT(16) -#define BIT_SATA BIT(17) -#define BIT_USB_EHCI BIT(18) -#define BIT_SEC BIT(19) +#define BIT_USH BIT(15) /* CHT datasheet: reserved */ +#define BIT_GBE BIT(16) /* CHT datasheet: reserved */ +#define BIT_SATA BIT(17) /* CHT datasheet: reserved */ +#define BIT_USB_EHCI BIT(18) /* CHT datasheet: XHCI! */ +#define BIT_SEC BIT(19) /* BYT datasheet: reserved */ #define BIT_PCIE_PORT0 BIT(20) #define BIT_PCIE_PORT1 BIT(21) #define BIT_PCIE_PORT2 BIT(22) diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h index c852fe24fe2a..752c06b47cc8 100644 --- a/include/linux/platform_data/x86/pwm-lpss.h +++ b/include/linux/platform_data/x86/pwm-lpss.h @@ -27,7 +27,7 @@ struct pwm_lpss_boardinfo { bool other_devices_aml_touches_pwm_regs; }; -struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, - const struct pwm_lpss_boardinfo *info); +struct pwm_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, + const struct pwm_lpss_boardinfo *info); #endif /* __PLATFORM_DATA_X86_PWM_LPSS_H */ diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index e5cbb6841f3a..f5492ed413f3 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -36,6 +36,7 @@ struct platform_profile_handler { int platform_profile_register(struct platform_profile_handler *pprof); int platform_profile_remove(void); +int platform_profile_cycle(void); void platform_profile_notify(void); #endif /*_PLATFORM_PROFILE_H_*/ diff --git a/include/linux/pm.h b/include/linux/pm.h index a2f3e53a8196..97b0e23363c8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -662,8 +662,8 @@ struct pm_subsys_data { struct dev_pm_info { pm_message_t power_state; - unsigned int can_wakeup:1; - unsigned int async_suspend:1; + bool can_wakeup:1; + bool async_suspend:1; bool in_dpm_list:1; /* Owned by the PM core */ bool is_prepared:1; /* Owned by the PM core */ bool is_suspended:1; /* Ditto */ @@ -682,10 +682,10 @@ struct dev_pm_info { bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ bool async_in_progress:1; /* Owned by the PM core */ - unsigned int must_resume:1; /* Owned by the PM core */ - unsigned int may_skip_resume:1; /* Set by subsystems */ + bool must_resume:1; /* Owned by the PM core */ + bool may_skip_resume:1; /* Set by subsystems */ #else - unsigned int should_wakeup:1; + bool should_wakeup:1; #endif #ifdef CONFIG_PM struct hrtimer suspend_timer; @@ -696,17 +696,17 @@ struct dev_pm_info { atomic_t usage_count; atomic_t child_count; unsigned int disable_depth:3; - unsigned int idle_notification:1; - unsigned int request_pending:1; - unsigned int deferred_resume:1; - unsigned int needs_force_resume:1; - unsigned int runtime_auto:1; + bool idle_notification:1; + bool request_pending:1; + bool deferred_resume:1; + bool needs_force_resume:1; + bool runtime_auto:1; bool ignore_children:1; - unsigned int no_callbacks:1; - unsigned int irq_safe:1; - unsigned int use_autosuspend:1; - unsigned int timer_autosuspends:1; - unsigned int memalloc_noio:1; + bool no_callbacks:1; + bool irq_safe:1; + bool use_autosuspend:1; + bool timer_autosuspends:1; + bool memalloc_noio:1; unsigned int links_count; enum rpm_request request; enum rpm_status runtime_status; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b97c5e9820f9..f24546a3d3db 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -20,6 +20,33 @@ #include <linux/time64.h> /* + * Flags to control the behaviour when attaching a device to its PM domains. + * + * PD_FLAG_NO_DEV_LINK: As the default behaviour creates a device-link + * for every PM domain that gets attached, this + * flag can be used to skip that. + * + * PD_FLAG_DEV_LINK_ON: Add the DL_FLAG_RPM_ACTIVE to power-on the + * supplier and its PM domain when creating the + * device-links. + * + */ +#define PD_FLAG_NO_DEV_LINK BIT(0) +#define PD_FLAG_DEV_LINK_ON BIT(1) + +struct dev_pm_domain_attach_data { + const char * const *pd_names; + const u32 num_pd_names; + const u32 pd_flags; +}; + +struct dev_pm_domain_list { + struct device **pd_devs; + struct device_link **pd_links; + u32 num_pds; +}; + +/* * Flags to control the behaviour of a genpd. * * These flags may be set in the struct generic_pm_domain's flags field by a @@ -233,6 +260,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); int pm_genpd_remove(struct generic_pm_domain *genpd); +struct device *dev_to_genpd_dev(struct device *dev); int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_genpd_remove_notifier(struct device *dev); @@ -280,6 +308,11 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd) return -EOPNOTSUPP; } +static inline struct device *dev_to_genpd_dev(struct device *dev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) { @@ -322,7 +355,7 @@ static inline void dev_pm_genpd_resume(struct device *dev) {} /* OF PM domain providers */ struct of_device_id; -typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args, +typedef struct generic_pm_domain *(*genpd_xlate_t)(const struct of_phandle_args *args, void *data); struct genpd_onecell_data { @@ -337,11 +370,11 @@ int of_genpd_add_provider_simple(struct device_node *np, int of_genpd_add_provider_onecell(struct device_node *np, struct genpd_onecell_data *data); void of_genpd_del_provider(struct device_node *np); -int of_genpd_add_device(struct of_phandle_args *args, struct device *dev); -int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec); -int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec); +int of_genpd_add_device(const struct of_phandle_args *args, struct device *dev); +int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec); +int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec); struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); @@ -366,20 +399,20 @@ static inline int of_genpd_add_provider_onecell(struct device_node *np, static inline void of_genpd_del_provider(struct device_node *np) {} -static inline int of_genpd_add_device(struct of_phandle_args *args, +static inline int of_genpd_add_device(const struct of_phandle_args *args, struct device *dev) { return -ENODEV; } -static inline int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec) +static inline int of_genpd_add_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec) { return -ENODEV; } -static inline int of_genpd_remove_subdomain(struct of_phandle_args *parent_spec, - struct of_phandle_args *subdomain_spec) +static inline int of_genpd_remove_subdomain(const struct of_phandle_args *parent_spec, + const struct of_phandle_args *subdomain_spec) { return -ENODEV; } @@ -420,7 +453,11 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev, unsigned int index); struct device *dev_pm_domain_attach_by_name(struct device *dev, const char *name); +int dev_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list); void dev_pm_domain_detach(struct device *dev, bool power_off); +void dev_pm_domain_detach_list(struct dev_pm_domain_list *list); int dev_pm_domain_start(struct device *dev); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); int dev_pm_domain_set_performance_state(struct device *dev, unsigned int state); @@ -439,7 +476,14 @@ static inline struct device *dev_pm_domain_attach_by_name(struct device *dev, { return NULL; } +static inline int dev_pm_domain_attach_list(struct device *dev, + const struct dev_pm_domain_attach_data *data, + struct dev_pm_domain_list **list) +{ + return 0; +} static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} +static inline void dev_pm_domain_detach_list(struct dev_pm_domain_list *list) {} static inline int dev_pm_domain_start(struct device *dev) { return 0; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 76dcb7f37bcd..dd7c8441af42 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -16,6 +16,7 @@ #include <linux/notifier.h> struct clk; +struct cpufreq_frequency_table; struct regulator; struct dev_pm_opp; struct device; @@ -87,12 +88,14 @@ struct dev_pm_opp_config { /** * struct dev_pm_opp_data - The data to use to initialize an OPP. + * @turbo: Flag to indicate whether the OPP is to be marked turbo or not. * @level: The performance level for the OPP. Set level to OPP_LEVEL_UNSET if * level field isn't used. * @freq: The clock rate in Hz for the OPP. * @u_volt: The voltage in uV for the OPP. */ struct dev_pm_opp_data { + bool turbo; unsigned int level; unsigned long freq; unsigned long u_volt; @@ -444,6 +447,21 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev) #endif /* CONFIG_PM_OPP */ +#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP) +int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table); +void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table); +#else +static inline int dev_pm_opp_init_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table) +{ + return -EINVAL; +} + +static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, struct cpufreq_frequency_table **table) +{ +} +#endif + + #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int dev_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); @@ -458,6 +476,8 @@ struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table); int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus); +int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW, + unsigned long *kHz); static inline void dev_pm_opp_of_unregister_em(struct device *dev) { em_dev_unregister_perf_domain(dev); @@ -521,6 +541,12 @@ static inline void dev_pm_opp_of_unregister_em(struct device *dev) { } +static inline int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW, + unsigned long *kHz) +{ + return -EOPNOTSUPP; +} + static inline int of_get_required_opp_performance_state(struct device_node *np, int index) { return -EOPNOTSUPP; diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7c9b35448563..d39dc863f612 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -72,7 +72,8 @@ extern int pm_runtime_force_resume(struct device *dev); extern int __pm_runtime_idle(struct device *dev, int rpmflags); extern int __pm_runtime_suspend(struct device *dev, int rpmflags); extern int __pm_runtime_resume(struct device *dev, int rpmflags); -extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count); +extern int pm_runtime_get_if_active(struct device *dev); +extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); @@ -95,18 +96,6 @@ extern void pm_runtime_release_supplier(struct device_link *link); extern int devm_pm_runtime_enable(struct device *dev); /** - * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. - * @dev: Target device. - * - * Increment the runtime PM usage counter of @dev if its runtime PM status is - * %RPM_ACTIVE and its runtime PM usage counter is greater than 0. - */ -static inline int pm_runtime_get_if_in_use(struct device *dev) -{ - return pm_runtime_get_if_active(dev, false); -} - -/** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. * @dev: Target device. * @enable: Whether or not to ignore possible dependencies on children. @@ -275,8 +264,7 @@ static inline int pm_runtime_get_if_in_use(struct device *dev) { return -EINVAL; } -static inline int pm_runtime_get_if_active(struct device *dev, - bool ign_usage_count) +static inline int pm_runtime_get_if_active(struct device *dev) { return -EINVAL; } @@ -461,6 +449,18 @@ static inline int pm_runtime_put(struct device *dev) } /** + * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. + * @dev: Target device. + * + * Decrement the runtime PM usage counter of @dev and if it turns out to be + * equal to 0, queue up a work item for @dev like in pm_request_autosuspend(). + */ +static inline int __pm_runtime_put_autosuspend(struct device *dev) +{ + return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO); +} + +/** * pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. * diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 6eb9adaef52b..76cd1f9f1365 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -107,7 +107,7 @@ extern void wakeup_sources_read_unlock(int idx); extern struct wakeup_source *wakeup_sources_walk_start(void); extern struct wakeup_source *wakeup_sources_walk_next(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); -extern int device_wakeup_disable(struct device *dev); +extern void device_wakeup_disable(struct device *dev); extern void device_set_wakeup_capable(struct device *dev, bool capable); extern int device_set_wakeup_enable(struct device *dev, bool enable); extern void __pm_stay_awake(struct wakeup_source *ws); @@ -154,10 +154,9 @@ static inline int device_wakeup_enable(struct device *dev) return 0; } -static inline int device_wakeup_disable(struct device *dev) +static inline void device_wakeup_disable(struct device *dev) { dev->power.should_wakeup = false; - return 0; } static inline int device_set_wakeup_enable(struct device *dev, bool enable) @@ -235,11 +234,10 @@ static inline int device_init_wakeup(struct device *dev, bool enable) if (enable) { device_set_wakeup_capable(dev, true); return device_wakeup_enable(dev); - } else { - device_wakeup_disable(dev); - device_set_wakeup_capable(dev, false); - return 0; } + device_wakeup_disable(dev); + device_set_wakeup_capable(dev, false); + return 0; } #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index ddbe7c3ca4ce..82561242cda4 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -469,6 +469,8 @@ int compare_pnp_id(struct pnp_id *pos, const char *id); int pnp_register_driver(struct pnp_driver *drv); void pnp_unregister_driver(struct pnp_driver *drv); +#define dev_is_pnp(d) ((d)->bus == &pnp_bus_type) + #else /* device management */ @@ -500,6 +502,8 @@ static inline int compare_pnp_id(struct pnp_id *pos, const char *id) { return -E static inline int pnp_register_driver(struct pnp_driver *drv) { return -ENODEV; } static inline void pnp_unregister_driver(struct pnp_driver *drv) { } +#define dev_is_pnp(d) false + #endif /* CONFIG_PNP */ /** diff --git a/include/linux/poll.h b/include/linux/poll.h index a9e0e1c2d1f2..d1ea4f3714a8 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -14,11 +14,7 @@ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ -#ifdef __clang__ -#define MAX_STACK_ALLOC 768 -#else #define MAX_STACK_ALLOC 832 -#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 7d8025fb74b7..5180dc9f1706 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -47,21 +47,12 @@ struct bq27xxx_access_methods { }; struct bq27xxx_reg_cache { - int temperature; - int time_to_empty; - int time_to_empty_avg; - int time_to_full; - int charge_full; - int cycle_count; int capacity; - int energy; int flags; - int health; }; struct bq27xxx_device_info { struct device *dev; - int id; enum bq27xxx_chip chip; u32 opts; const char *name; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c0992a77feea..8e5705a56b85 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -242,6 +242,7 @@ struct power_supply_config { struct power_supply_desc { const char *name; enum power_supply_type type; + u8 charge_behaviours; const enum power_supply_usb_type *usb_types; size_t num_usb_types; const enum power_supply_property *properties; @@ -894,8 +895,7 @@ extern int power_supply_powers(struct power_supply *psy, struct device *dev); #define to_power_supply(device) container_of(device, struct power_supply, dev) extern void *power_supply_get_drvdata(struct power_supply *psy); -/* For APM emulation, think legacy userspace. */ -extern struct class *power_supply_class; +extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)); static inline bool power_supply_is_amp_property(enum power_supply_property psp) { diff --git a/include/linux/printk.h b/include/linux/printk.h index 8ef499ab3c1e..40afab23881a 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -60,6 +60,9 @@ static inline const char *printk_skip_headers(const char *buffer) #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET +int add_preferred_console_match(const char *match, const char *name, + const short idx); + extern int console_printk[]; #define console_loglevel (console_printk[0]) @@ -71,7 +74,7 @@ extern void console_verbose(void); /* strlen("ratelimit") + 1 */ #define DEVKMSG_STR_MAX_SIZE 10 -extern char devkmsg_log_str[]; +extern char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE]; struct ctl_table; extern int suppress_printk; @@ -126,7 +129,7 @@ struct va_format { #define no_printk(fmt, ...) \ ({ \ if (0) \ - printk(fmt, ##__VA_ARGS__); \ + _printk(fmt, ##__VA_ARGS__); \ 0; \ }) @@ -192,6 +195,7 @@ void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); +void console_replay_all(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -271,8 +275,13 @@ static inline void dump_stack(void) static inline void printk_trigger_flush(void) { } +static inline void console_replay_all(void) +{ +} #endif +bool this_cpu_in_panic(void); + #ifdef CONFIG_SMP extern int __printk_cpu_sync_try_get(void); extern void __printk_cpu_sync_wait(void); diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 49539bc416ce..5ea470eb4d76 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -66,7 +66,7 @@ static inline void proc_free_inum(unsigned int inum) {} static inline int ns_alloc_inum(struct ns_common *ns) { - atomic_long_set(&ns->stashed, 0); + WRITE_ONCE(ns->stashed, NULL); return proc_alloc_inum(&ns->inum); } diff --git a/include/linux/profile.h b/include/linux/profile.h index 11db1ec516e2..04ae5ebcb637 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -18,13 +18,8 @@ struct proc_dir_entry; struct notifier_block; #if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS) -void create_prof_cpu_mask(void); int create_proc_profile(void); #else -static inline void create_prof_cpu_mask(void) -{ -} - static inline int create_proc_profile(void) { return 0; diff --git a/include/linux/property.h b/include/linux/property.h index e6516d0b7d52..61fc20e5f81f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -11,7 +11,9 @@ #define _LINUX_PROPERTY_H_ #include <linux/args.h> +#include <linux/array_size.h> #include <linux/bits.h> +#include <linux/cleanup.h> #include <linux/fwnode.h> #include <linux/stddef.h> #include <linux/types.h> @@ -27,12 +29,6 @@ enum dev_prop_type { DEV_PROP_REF, }; -enum dev_dma_attr { - DEV_DMA_NOT_SUPPORTED, - DEV_DMA_NON_COHERENT, - DEV_DMA_COHERENT, -}; - const struct fwnode_handle *__dev_fwnode_const(const struct device *dev); struct fwnode_handle *__dev_fwnode(struct device *dev); #define dev_fwnode(dev) \ @@ -156,11 +152,9 @@ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); for (parent = fwnode_get_parent(fwnode); parent; \ parent = fwnode_get_next_parent(parent)) -struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwnode); unsigned int fwnode_count_parents(const struct fwnode_handle *fwn); struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwn, unsigned int depth); -bool fwnode_is_ancestor_of(const struct fwnode_handle *ancestor, const struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_child_node( const struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *fwnode_get_next_available_child_node( @@ -181,13 +175,32 @@ struct fwnode_handle *device_get_next_child_node(const struct device *dev, for (child = device_get_next_child_node(dev, NULL); child; \ child = device_get_next_child_node(dev, child)) +#define device_for_each_child_node_scoped(dev, child) \ + for (struct fwnode_handle *child __free(fwnode_handle) = \ + device_get_next_child_node(dev, NULL); \ + child; child = device_get_next_child_node(dev, child)) + struct fwnode_handle *fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname); struct fwnode_handle *device_get_named_child_node(const struct device *dev, const char *childname); struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode); -void fwnode_handle_put(struct fwnode_handle *fwnode); + +/** + * fwnode_handle_put - Drop reference to a device node + * @fwnode: Pointer to the device node to drop the reference to. + * + * This has to be used when terminating device_for_each_child_node() iteration + * with break or return to prevent stale device node references from being left + * behind. + */ +static inline void fwnode_handle_put(struct fwnode_handle *fwnode) +{ + fwnode_call_void_op(fwnode, put); +} + +DEFINE_FREE(fwnode_handle, struct fwnode_handle *, fwnode_handle_put(_T)) int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index fb724c65c77b..6d07c95dabb9 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -15,11 +15,14 @@ struct pse_controller_dev; /** * struct pse_control_config - PSE control/channel configuration. * - * @admin_cotrol: set PoDL PSE admin control as described in + * @podl_admin_control: set PoDL PSE admin control as described in * IEEE 802.3-2018 30.15.1.2.1 acPoDLPSEAdminControl + * @c33_admin_control: set PSE admin control as described in + * IEEE 802.3-2022 30.9.1.2.1 acPSEAdminControl */ struct pse_control_config { - enum ethtool_podl_pse_admin_state admin_cotrol; + enum ethtool_podl_pse_admin_state podl_admin_control; + enum ethtool_c33_pse_admin_state c33_admin_control; }; /** @@ -29,25 +32,36 @@ struct pse_control_config { * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState * @podl_pw_status: power detection status of the PoDL PSE. * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @c33_admin_state: operational state of the PSE + * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState + * @c33_pw_status: power detection status of the PSE. + * IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus: */ struct pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; + enum ethtool_c33_pse_admin_state c33_admin_state; + enum ethtool_c33_pse_pw_d_status c33_pw_status; }; /** * struct pse_controller_ops - PSE controller driver callbacks * * @ethtool_get_status: get PSE control status for ethtool interface - * @ethtool_set_config: set PSE control configuration over ethtool interface + * @setup_pi_matrix: setup PI matrix of the PSE controller + * @pi_is_enabled: Return 1 if the PSE PI is enabled, 0 if not. + * May also return negative errno. + * @pi_enable: Configure the PSE PI as enabled. + * @pi_disable: Configure the PSE PI as disabled. */ struct pse_controller_ops { int (*ethtool_get_status)(struct pse_controller_dev *pcdev, unsigned long id, struct netlink_ext_ack *extack, struct pse_control_status *status); - int (*ethtool_set_config)(struct pse_controller_dev *pcdev, - unsigned long id, struct netlink_ext_ack *extack, - const struct pse_control_config *config); + int (*setup_pi_matrix)(struct pse_controller_dev *pcdev); + int (*pi_is_enabled)(struct pse_controller_dev *pcdev, int id); + int (*pi_enable)(struct pse_controller_dev *pcdev, int id); + int (*pi_disable)(struct pse_controller_dev *pcdev, int id); }; struct module; @@ -55,6 +69,40 @@ struct device_node; struct of_phandle_args; struct pse_control; +/* PSE PI pairset pinout can either be Alternative A or Alternative B */ +enum pse_pi_pairset_pinout { + ALTERNATIVE_A, + ALTERNATIVE_B, +}; + +/** + * struct pse_pi_pairset - PSE PI pairset entity describing the pinout + * alternative ant its phandle + * + * @pinout: description of the pinout alternative + * @np: device node pointer describing the pairset phandle + */ +struct pse_pi_pairset { + enum pse_pi_pairset_pinout pinout; + struct device_node *np; +}; + +/** + * struct pse_pi - PSE PI (Power Interface) entity as described in + * IEEE 802.3-2022 145.2.4 + * + * @pairset: table of the PSE PI pinout alternative for the two pairset + * @np: device node pointer of the PSE PI node + * @rdev: regulator represented by the PSE PI + * @admin_state_enabled: PI enabled state + */ +struct pse_pi { + struct pse_pi_pairset pairset[2]; + struct device_node *np; + struct regulator_dev *rdev; + bool admin_state_enabled; +}; + /** * struct pse_controller_dev - PSE controller entity that might * provide multiple PSE controls @@ -64,10 +112,11 @@ struct pse_control; * @pse_control_head: head of internal list of requested PSE controls * @dev: corresponding driver model device struct * @of_pse_n_cells: number of cells in PSE line specifiers - * @of_xlate: translation function to translate from specifier as found in the - * device tree to id as given to the PSE control ops * @nr_lines: number of PSE controls in this controller device * @lock: Mutex for serialization access to the PSE controller + * @types: types of the PSE controller + * @pi: table of PSE PIs described in this controller device + * @no_of_pse_pi: flag set if the pse_pis devicetree node is not used */ struct pse_controller_dev { const struct pse_controller_ops *ops; @@ -76,10 +125,11 @@ struct pse_controller_dev { struct list_head pse_control_head; struct device *dev; int of_pse_n_cells; - int (*of_xlate)(struct pse_controller_dev *pcdev, - const struct of_phandle_args *pse_spec); unsigned int nr_lines; struct mutex lock; + enum ethtool_pse_types types; + struct pse_pi *pi; + bool no_of_pse_pi; }; #if IS_ENABLED(CONFIG_PSE_CONTROLLER) @@ -99,6 +149,9 @@ int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config); +bool pse_has_podl(struct pse_control *psec); +bool pse_has_c33(struct pse_control *psec); + #else static inline struct pse_control *of_pse_control_get(struct device_node *node) @@ -124,6 +177,16 @@ static inline int pse_ethtool_set_config(struct pse_control *psec, return -ENOTSUPP; } +static inline bool pse_has_podl(struct pse_control *psec) +{ + return false; +} + +static inline bool pse_has_c33(struct pse_control *psec) +{ + return false; +} + #endif #endif diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 7fd17e82bab4..3705c2044fc0 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -78,6 +78,36 @@ enum sev_cmd { SEV_CMD_DBG_DECRYPT = 0x060, SEV_CMD_DBG_ENCRYPT = 0x061, + /* SNP specific commands */ + SEV_CMD_SNP_INIT = 0x081, + SEV_CMD_SNP_SHUTDOWN = 0x082, + SEV_CMD_SNP_PLATFORM_STATUS = 0x083, + SEV_CMD_SNP_DF_FLUSH = 0x084, + SEV_CMD_SNP_INIT_EX = 0x085, + SEV_CMD_SNP_SHUTDOWN_EX = 0x086, + SEV_CMD_SNP_DECOMMISSION = 0x090, + SEV_CMD_SNP_ACTIVATE = 0x091, + SEV_CMD_SNP_GUEST_STATUS = 0x092, + SEV_CMD_SNP_GCTX_CREATE = 0x093, + SEV_CMD_SNP_GUEST_REQUEST = 0x094, + SEV_CMD_SNP_ACTIVATE_EX = 0x095, + SEV_CMD_SNP_LAUNCH_START = 0x0A0, + SEV_CMD_SNP_LAUNCH_UPDATE = 0x0A1, + SEV_CMD_SNP_LAUNCH_FINISH = 0x0A2, + SEV_CMD_SNP_DBG_DECRYPT = 0x0B0, + SEV_CMD_SNP_DBG_ENCRYPT = 0x0B1, + SEV_CMD_SNP_PAGE_SWAP_OUT = 0x0C0, + SEV_CMD_SNP_PAGE_SWAP_IN = 0x0C1, + SEV_CMD_SNP_PAGE_MOVE = 0x0C2, + SEV_CMD_SNP_PAGE_MD_INIT = 0x0C3, + SEV_CMD_SNP_PAGE_SET_STATE = 0x0C6, + SEV_CMD_SNP_PAGE_RECLAIM = 0x0C7, + SEV_CMD_SNP_PAGE_UNSMASH = 0x0C8, + SEV_CMD_SNP_CONFIG = 0x0C9, + SEV_CMD_SNP_DOWNLOAD_FIRMWARE_EX = 0x0CA, + SEV_CMD_SNP_COMMIT = 0x0CB, + SEV_CMD_SNP_VLEK_LOAD = 0x0CD, + SEV_CMD_MAX, }; @@ -523,12 +553,269 @@ struct sev_data_attestation_report { u32 len; /* In/Out */ } __packed; +/** + * struct sev_data_snp_download_firmware - SNP_DOWNLOAD_FIRMWARE command params + * + * @address: physical address of firmware image + * @len: length of the firmware image + */ +struct sev_data_snp_download_firmware { + u64 address; /* In */ + u32 len; /* In */ +} __packed; + +/** + * struct sev_data_snp_activate - SNP_ACTIVATE command params + * + * @gctx_paddr: system physical address guest context page + * @asid: ASID to bind to the guest + */ +struct sev_data_snp_activate { + u64 gctx_paddr; /* In */ + u32 asid; /* In */ +} __packed; + +/** + * struct sev_data_snp_addr - generic SNP command params + * + * @address: physical address of generic data param + */ +struct sev_data_snp_addr { + u64 address; /* In/Out */ +} __packed; + +/** + * struct sev_data_snp_launch_start - SNP_LAUNCH_START command params + * + * @gctx_paddr: system physical address of guest context page + * @policy: guest policy + * @ma_gctx_paddr: system physical address of migration agent + * @ma_en: the guest is associated with a migration agent + * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the + * purpose of guest-assisted migration. + * @rsvd: reserved + * @gosvw: guest OS-visible workarounds, as defined by hypervisor + */ +struct sev_data_snp_launch_start { + u64 gctx_paddr; /* In */ + u64 policy; /* In */ + u64 ma_gctx_paddr; /* In */ + u32 ma_en:1; /* In */ + u32 imi_en:1; /* In */ + u32 rsvd:30; + u8 gosvw[16]; /* In */ +} __packed; + +/* SNP support page type */ +enum { + SNP_PAGE_TYPE_NORMAL = 0x1, + SNP_PAGE_TYPE_VMSA = 0x2, + SNP_PAGE_TYPE_ZERO = 0x3, + SNP_PAGE_TYPE_UNMEASURED = 0x4, + SNP_PAGE_TYPE_SECRET = 0x5, + SNP_PAGE_TYPE_CPUID = 0x6, + + SNP_PAGE_TYPE_MAX +}; + +/** + * struct sev_data_snp_launch_update - SNP_LAUNCH_UPDATE command params + * + * @gctx_paddr: system physical address of guest context page + * @page_size: page size 0 indicates 4K and 1 indicates 2MB page + * @page_type: encoded page type + * @imi_page: indicates that this page is part of the IMI (Incoming Migration + * Image) of the guest + * @rsvd: reserved + * @rsvd2: reserved + * @address: system physical address of destination page to encrypt + * @rsvd3: reserved + * @vmpl1_perms: VMPL permission mask for VMPL1 + * @vmpl2_perms: VMPL permission mask for VMPL2 + * @vmpl3_perms: VMPL permission mask for VMPL3 + * @rsvd4: reserved + */ +struct sev_data_snp_launch_update { + u64 gctx_paddr; /* In */ + u32 page_size:1; /* In */ + u32 page_type:3; /* In */ + u32 imi_page:1; /* In */ + u32 rsvd:27; + u32 rsvd2; + u64 address; /* In */ + u32 rsvd3:8; + u32 vmpl1_perms:8; /* In */ + u32 vmpl2_perms:8; /* In */ + u32 vmpl3_perms:8; /* In */ + u32 rsvd4; +} __packed; + +/** + * struct sev_data_snp_launch_finish - SNP_LAUNCH_FINISH command params + * + * @gctx_paddr: system physical address of guest context page + * @id_block_paddr: system physical address of ID block + * @id_auth_paddr: system physical address of ID block authentication structure + * @id_block_en: indicates whether ID block is present + * @auth_key_en: indicates whether author key is present in authentication structure + * @rsvd: reserved + * @host_data: host-supplied data for guest, not interpreted by firmware + */ +struct sev_data_snp_launch_finish { + u64 gctx_paddr; + u64 id_block_paddr; + u64 id_auth_paddr; + u8 id_block_en:1; + u8 auth_key_en:1; + u64 rsvd:62; + u8 host_data[32]; +} __packed; + +/** + * struct sev_data_snp_guest_status - SNP_GUEST_STATUS command params + * + * @gctx_paddr: system physical address of guest context page + * @address: system physical address of guest status page + */ +struct sev_data_snp_guest_status { + u64 gctx_paddr; + u64 address; +} __packed; + +/** + * struct sev_data_snp_page_reclaim - SNP_PAGE_RECLAIM command params + * + * @paddr: system physical address of page to be claimed. The 0th bit in the + * address indicates the page size. 0h indicates 4KB and 1h indicates + * 2MB page. + */ +struct sev_data_snp_page_reclaim { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_page_unsmash - SNP_PAGE_UNSMASH command params + * + * @paddr: system physical address of page to be unsmashed. The 0th bit in the + * address indicates the page size. 0h indicates 4 KB and 1h indicates + * 2 MB page. + */ +struct sev_data_snp_page_unsmash { + u64 paddr; +} __packed; + +/** + * struct sev_data_snp_dbg - DBG_ENCRYPT/DBG_DECRYPT command parameters + * + * @gctx_paddr: system physical address of guest context page + * @src_addr: source address of data to operate on + * @dst_addr: destination address of data to operate on + */ +struct sev_data_snp_dbg { + u64 gctx_paddr; /* In */ + u64 src_addr; /* In */ + u64 dst_addr; /* In */ +} __packed; + +/** + * struct sev_data_snp_guest_request - SNP_GUEST_REQUEST command params + * + * @gctx_paddr: system physical address of guest context page + * @req_paddr: system physical address of request page + * @res_paddr: system physical address of response page + */ +struct sev_data_snp_guest_request { + u64 gctx_paddr; /* In */ + u64 req_paddr; /* In */ + u64 res_paddr; /* In */ +} __packed; + +/** + * struct sev_data_snp_init_ex - SNP_INIT_EX structure + * + * @init_rmp: indicate that the RMP should be initialized. + * @list_paddr_en: indicate that list_paddr is valid + * @rsvd: reserved + * @rsvd1: reserved + * @list_paddr: system physical address of range list + * @rsvd2: reserved + */ +struct sev_data_snp_init_ex { + u32 init_rmp:1; + u32 list_paddr_en:1; + u32 rsvd:30; + u32 rsvd1; + u64 list_paddr; + u8 rsvd2[48]; +} __packed; + +/** + * struct sev_data_range - RANGE structure + * + * @base: system physical address of first byte of range + * @page_count: number of 4KB pages in this range + * @rsvd: reserved + */ +struct sev_data_range { + u64 base; + u32 page_count; + u32 rsvd; +} __packed; + +/** + * struct sev_data_range_list - RANGE_LIST structure + * + * @num_elements: number of elements in RANGE_ARRAY + * @rsvd: reserved + * @ranges: array of num_elements of type RANGE + */ +struct sev_data_range_list { + u32 num_elements; + u32 rsvd; + struct sev_data_range ranges[]; +} __packed; + +/** + * struct sev_data_snp_shutdown_ex - SNP_SHUTDOWN_EX structure + * + * @len: length of the command buffer read by the PSP + * @iommu_snp_shutdown: Disable enforcement of SNP in the IOMMU + * @rsvd1: reserved + */ +struct sev_data_snp_shutdown_ex { + u32 len; + u32 iommu_snp_shutdown:1; + u32 rsvd1:31; +} __packed; + +/** + * struct sev_platform_init_args + * + * @error: SEV firmware error code + * @probe: True if this is being called as part of CCP module probe, which + * will defer SEV_INIT/SEV_INIT_EX firmware initialization until needed + * unless psp_init_on_probe module param is set + */ +struct sev_platform_init_args { + int error; + bool probe; +}; + +/** + * struct sev_data_snp_commit - SNP_COMMIT structure + * + * @len: length of the command buffer read by the PSP + */ +struct sev_data_snp_commit { + u32 len; +} __packed; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** * sev_platform_init - perform SEV INIT command * - * @error: SEV command return code + * @args: struct sev_platform_init_args to pass in arguments * * Returns: * 0 if the SEV successfully processed the command @@ -537,7 +824,7 @@ struct sev_data_attestation_report { * -%ETIMEDOUT if the SEV command timed out * -%EIO if the SEV returned a non-zero return code */ -int sev_platform_init(int *error); +int sev_platform_init(struct sev_platform_init_args *args); /** * sev_platform_status - perform SEV PLATFORM_STATUS command @@ -637,14 +924,32 @@ int sev_guest_df_flush(int *error); */ int sev_guest_decommission(struct sev_data_decommission *data, int *error); +/** + * sev_do_cmd - issue an SEV or an SEV-SNP command + * + * @cmd: SEV or SEV-SNP firmware command to issue + * @data: arguments for firmware command + * @psp_ret: SEV command return code + * + * Returns: + * 0 if the SEV device successfully processed the command + * -%ENODEV if the PSP device is not available + * -%ENOTSUPP if PSP device does not support SEV + * -%ETIMEDOUT if the SEV command timed out + * -%EIO if PSP device returned a non-zero return code + */ +int sev_do_cmd(int cmd, void *data, int *psp_ret); + void *psp_copy_user_blob(u64 uaddr, u32 len); +void *snp_alloc_firmware_page(gfp_t mask); +void snp_free_firmware_page(void *addr); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ static inline int sev_platform_status(struct sev_user_data_status *status, int *error) { return -ENODEV; } -static inline int sev_platform_init(int *error) { return -ENODEV; } +static inline int sev_platform_init(struct sev_platform_init_args *args) { return -ENODEV; } static inline int sev_guest_deactivate(struct sev_data_deactivate *data, int *error) { return -ENODEV; } @@ -653,6 +958,9 @@ static inline int sev_guest_decommission(struct sev_data_decommission *data, int *error) { return -ENODEV; } static inline int +sev_do_cmd(int cmd, void *data, int *psp_ret) { return -ENODEV; } + +static inline int sev_guest_activate(struct sev_data_activate *data, int *error) { return -ENODEV; } static inline int sev_guest_df_flush(int *error) { return -ENODEV; } @@ -662,6 +970,13 @@ sev_issue_cmd_external_user(struct file *filep, unsigned int id, void *data, int static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_PTR(-EINVAL); } +static inline void *snp_alloc_firmware_page(gfp_t mask) +{ + return NULL; +} + +static inline void snp_free_firmware_page(void *addr) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index 2a3a95586425..8dbd51ea8626 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -18,6 +18,16 @@ struct ptdump_state { const struct ptdump_range *range; }; +bool ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, + bool checkwx, bool dmesg); void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd); +bool ptdump_check_wx(void); + +static inline void debug_checkwx(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_WX)) + ptdump_check_wx(); +} #endif /* _LINUX_PTDUMP_H */ diff --git a/include/linux/pti.h b/include/linux/pti.h index 1a941efcaa62..1fbf9d6c20ef 100644 --- a/include/linux/pti.h +++ b/include/linux/pti.h @@ -2,7 +2,7 @@ #ifndef _INCLUDE_PTI_H #define _INCLUDE_PTI_H -#ifdef CONFIG_PAGE_TABLE_ISOLATION +#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION #include <asm/pti.h> #else static inline void pti_init(void) { } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 1ef4e0f9bd2a..6e4b8206c7d0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -200,6 +200,7 @@ struct ptp_clock; enum ptp_clock_events { PTP_CLOCK_ALARM, PTP_CLOCK_EXTTS, + PTP_CLOCK_EXTOFF, PTP_CLOCK_PPS, PTP_CLOCK_PPSUSR, }; @@ -210,6 +211,7 @@ enum ptp_clock_events { * @type: One of the ptp_clock_events enumeration values. * @index: Identifies the source of the event. * @timestamp: When the event occurred (%PTP_CLOCK_EXTTS only). + * @offset: When the event occurred (%PTP_CLOCK_EXTOFF only). * @pps_times: When the event occurred (%PTP_CLOCK_PPSUSR only). */ @@ -218,6 +220,7 @@ struct ptp_clock_event { int index; union { u64 timestamp; + s64 offset; struct pps_event_time pps_times; }; }; diff --git a/include/linux/ptp_kvm.h b/include/linux/ptp_kvm.h index 746fd67c3480..e8c74fa3f455 100644 --- a/include/linux/ptp_kvm.h +++ b/include/linux/ptp_kvm.h @@ -8,15 +8,15 @@ #ifndef _PTP_KVM_H_ #define _PTP_KVM_H_ +#include <linux/clocksource_ids.h> #include <linux/types.h> struct timespec64; -struct clocksource; int kvm_arch_ptp_init(void); void kvm_arch_ptp_exit(void); int kvm_arch_ptp_get_clock(struct timespec64 *ts); int kvm_arch_ptp_get_crosststamp(u64 *cycle, - struct timespec64 *tspec, struct clocksource **cs); + struct timespec64 *tspec, enum clocksource_ids *cs_id); #endif /* _PTP_KVM_H_ */ diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 808f9d3ee546..fd037c127bb0 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -464,11 +464,11 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See * documentation for vmalloc for which of them are legal. */ -static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) +static inline void **__ptr_ring_init_queue_alloc_noprof(unsigned int size, gfp_t gfp) { if (size > KMALLOC_MAX_SIZE / sizeof(void *)) return NULL; - return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); + return kvmalloc_array_noprof(size, sizeof(void *), gfp | __GFP_ZERO); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) @@ -484,9 +484,9 @@ static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) r->batch = 1; } -static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) +static inline int ptr_ring_init_noprof(struct ptr_ring *r, int size, gfp_t gfp) { - r->queue = __ptr_ring_init_queue_alloc(size, gfp); + r->queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!r->queue) return -ENOMEM; @@ -497,6 +497,7 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } +#define ptr_ring_init(...) alloc_hooks(ptr_ring_init_noprof(__VA_ARGS__)) /* * Return entries into ring. Destroy entries that don't fit. @@ -587,11 +588,11 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, +static inline int ptr_ring_resize_noprof(struct ptr_ring *r, int size, gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; - void **queue = __ptr_ring_init_queue_alloc(size, gfp); + void **queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); void **old; if (!queue) @@ -609,6 +610,7 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, return 0; } +#define ptr_ring_resize(...) alloc_hooks(ptr_ring_resize_noprof(__VA_ARGS__)) /* * Note: producer lock is nested within consumer lock, so if you @@ -616,21 +618,21 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, - unsigned int nrings, - int size, - gfp_t gfp, void (*destroy)(void *)) +static inline int ptr_ring_resize_multiple_noprof(struct ptr_ring **rings, + unsigned int nrings, + int size, + gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; void ***queues; int i; - queues = kmalloc_array(nrings, sizeof(*queues), gfp); + queues = kmalloc_array_noprof(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; for (i = 0; i < nrings; ++i) { - queues[i] = __ptr_ring_init_queue_alloc(size, gfp); + queues[i] = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!queues[i]) goto nomem; } @@ -660,6 +662,8 @@ nomem: noqueues: return -ENOMEM; } +#define ptr_ring_resize_multiple(...) \ + alloc_hooks(ptr_ring_resize_multiple_noprof(__VA_ARGS__)) static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { diff --git a/include/linux/pwm.h b/include/linux/pwm.h index fcc2c4496f73..60b92c2c75ef 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -2,6 +2,7 @@ #ifndef __LINUX_PWM_H #define __LINUX_PWM_H +#include <linux/device.h> #include <linux/err.h> #include <linux/mutex.h> #include <linux/of.h> @@ -271,12 +272,12 @@ struct pwm_ops { * @id: unique number of this PWM chip * @npwm: number of PWMs controlled by this chip * @of_xlate: request a PWM device given a device tree PWM specifier - * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier * @atomic: can the driver's ->apply() be called in atomic context + * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip * @pwms: array of PWM devices allocated by the framework */ struct pwm_chip { - struct device *dev; + struct device dev; const struct pwm_ops *ops; struct module *owner; unsigned int id; @@ -284,13 +285,28 @@ struct pwm_chip { struct pwm_device * (*of_xlate)(struct pwm_chip *chip, const struct of_phandle_args *args); - unsigned int of_pwm_n_cells; bool atomic; /* only used internally by the PWM framework */ - struct pwm_device *pwms; + bool uses_pwmchip_alloc; + struct pwm_device pwms[] __counted_by(npwm); }; +static inline struct device *pwmchip_parent(const struct pwm_chip *chip) +{ + return chip->dev.parent; +} + +static inline void *pwmchip_get_drvdata(struct pwm_chip *chip) +{ + return dev_get_drvdata(&chip->dev); +} + +static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) +{ + dev_set_drvdata(&chip->dev, data); +} + #if IS_ENABLED(CONFIG_PWM) /* PWM user APIs */ int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state); @@ -380,6 +396,10 @@ static inline bool pwm_might_sleep(struct pwm_device *pwm) int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result, unsigned long timeout); +void pwmchip_put(struct pwm_chip *chip); +struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); +struct pwm_chip *devm_pwmchip_alloc(struct device *parent, unsigned int npwm, size_t sizeof_priv); + int __pwmchip_add(struct pwm_chip *chip, struct module *owner); #define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE) void pwmchip_remove(struct pwm_chip *chip); @@ -452,6 +472,24 @@ static inline int pwm_capture(struct pwm_device *pwm, return -EINVAL; } +static inline void pwmchip_put(struct pwm_chip *chip) +{ +} + +static inline struct pwm_chip *pwmchip_alloc(struct device *parent, + unsigned int npwm, + size_t sizeof_priv) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct pwm_chip *devm_pwmchip_alloc(struct device *parent, + unsigned int npwm, + size_t sizeof_priv) +{ + return pwmchip_alloc(parent, npwm, sizeof_priv); +} + static inline int pwmchip_add(struct pwm_chip *chip) { return -EINVAL; @@ -583,17 +621,4 @@ static inline void pwm_remove_table(struct pwm_lookup *table, size_t num) } #endif -#ifdef CONFIG_PWM_SYSFS -void pwmchip_sysfs_export(struct pwm_chip *chip); -void pwmchip_sysfs_unexport(struct pwm_chip *chip); -#else -static inline void pwmchip_sysfs_export(struct pwm_chip *chip) -{ -} - -static inline void pwmchip_sysfs_unexport(struct pwm_chip *chip) -{ -} -#endif /* CONFIG_PWM_SYSFS */ - #endif /* __LINUX_PWM_H */ diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index cdd2ac366bc7..0bf80e98d5b4 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -19,7 +19,6 @@ struct platform_pwm_backlight_data { int (*notify)(struct device *dev, int brightness); void (*notify_after)(struct device *dev, int brightness); void (*exit)(struct device *dev); - int (*check_fb)(struct device *dev, struct fb_info *info); }; #endif diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index cd1973e6ac4b..844a2743ca94 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -217,9 +217,9 @@ enum pxa_ssp_type { PXA27x_SSP, PXA3xx_SSP, PXA168_SSP, - MMP2_SSP, PXA910_SSP, CE4100_SSP, + MMP2_SSP, MRFLD_SSP, QUARK_X1000_SSP, /* Keep LPSS types sorted with lpss_platforms[] */ diff --git a/include/linux/qat/qat_mig_dev.h b/include/linux/qat/qat_mig_dev.h new file mode 100644 index 000000000000..dbbb6a063dd2 --- /dev/null +++ b/include/linux/qat/qat_mig_dev.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef QAT_MIG_DEV_H_ +#define QAT_MIG_DEV_H_ + +struct pci_dev; + +struct qat_mig_dev { + void *parent_accel_dev; + u8 *state; + u32 setup_size; + u32 remote_setup_size; + u32 state_size; + s32 vf_id; +}; + +struct qat_mig_dev *qat_vfmig_create(struct pci_dev *pdev, int vf_id); +int qat_vfmig_init(struct qat_mig_dev *mdev); +void qat_vfmig_cleanup(struct qat_mig_dev *mdev); +void qat_vfmig_reset(struct qat_mig_dev *mdev); +int qat_vfmig_open(struct qat_mig_dev *mdev); +void qat_vfmig_close(struct qat_mig_dev *mdev); +int qat_vfmig_suspend(struct qat_mig_dev *mdev); +int qat_vfmig_resume(struct qat_mig_dev *mdev); +int qat_vfmig_save_state(struct qat_mig_dev *mdev); +int qat_vfmig_save_setup(struct qat_mig_dev *mdev); +int qat_vfmig_load_state(struct qat_mig_dev *mdev); +int qat_vfmig_load_setup(struct qat_mig_dev *mdev, int size); +void qat_vfmig_destroy(struct qat_mig_dev *mdev); + +#endif /*QAT_MIG_DEV_H_*/ diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 5d868505a94e..6d92b68efbf6 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset); if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \ &randomize_kstack_offset)) { \ u32 offset = raw_cpu_read(kstack_offset); \ - offset ^= (rand); \ + offset = ror32(offset, 5) ^ (rand); \ raw_cpu_write(kstack_offset, offset); \ } \ } while (0) diff --git a/include/linux/ras.h b/include/linux/ras.h index 1f4048bf2674..a64182bc72ad 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -25,6 +25,7 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); + #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -35,4 +36,21 @@ static inline void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif +struct atl_err { + u64 addr; + u64 ipid; + u32 cpu; +}; + +#if IS_ENABLED(CONFIG_AMD_ATL) +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); +void amd_atl_unregister_decoder(void); +void amd_retire_dram_row(struct atl_err *err); +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +#else +static inline void amd_retire_dram_row(struct atl_err *err) { } +static inline unsigned long +amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } +#endif /* CONFIG_AMD_ATL */ + #endif /* __RAS_H__ */ diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 0027d4c8087c..3860dbb9107a 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) } extern void rcu_sync_init(struct rcu_sync *); -extern void rcu_sync_enter_start(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 0746b1b0b663..dfd2399f2cde 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -184,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); do { \ int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting); \ \ - if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) && \ + if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) && \ likely(!___rttq_nesting)) { \ - rcu_trc_cmpxchg_need_qs((t), 0, TRC_NEED_QS_CHECKED); \ + rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED); \ } else if (___rttq_nesting && ___rttq_nesting != INT_MIN && \ !READ_ONCE((t)->trc_reader_special.b.blocked)) { \ rcu_tasks_trace_qs_blkd(t); \ @@ -247,6 +247,37 @@ do { \ cond_resched(); \ } while (0) +/** + * rcu_softirq_qs_periodic - Report RCU and RCU-Tasks quiescent states + * @old_ts: jiffies at start of processing. + * + * This helper is for long-running softirq handlers, such as NAPI threads in + * networking. The caller should initialize the variable passed in as @old_ts + * at the beginning of the softirq handler. When invoked frequently, this macro + * will invoke rcu_softirq_qs() every 100 milliseconds thereafter, which will + * provide both RCU and RCU-Tasks quiescent states. Note that this macro + * modifies its old_ts argument. + * + * Because regions of code that have disabled softirq act as RCU read-side + * critical sections, this macro should be invoked with softirq (and + * preemption) enabled. + * + * The macro is not needed when CONFIG_PREEMPT_RT is defined. RT kernels would + * have more chance to invoke schedule() calls and provide necessary quiescent + * states. As a contrast, calling cond_resched() only won't achieve the same + * effect because cond_resched() does not provide RCU-Tasks quiescent states. + */ +#define rcu_softirq_qs_periodic(old_ts) \ +do { \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && \ + time_after(jiffies, (old_ts) + HZ / 10)) { \ + preempt_disable(); \ + rcu_softirq_qs(); \ + preempt_enable(); \ + (old_ts) = jiffies; \ + } \ +} while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. @@ -370,15 +401,15 @@ static inline int debug_lockdep_rcu_enabled(void) } \ } while (0) -#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) +#ifndef CONFIG_PREEMPT_RCU static inline void rcu_preempt_sleep_check(void) { RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), "Illegal context switch in RCU read-side critical section"); } -#else /* #ifdef CONFIG_PROVE_RCU */ +#else // #ifndef CONFIG_PREEMPT_RCU static inline void rcu_preempt_sleep_check(void) { } -#endif /* #else #ifdef CONFIG_PROVE_RCU */ +#endif // #else // #ifndef CONFIG_PREEMPT_RCU #define rcu_sleep_check() \ do { \ @@ -778,9 +809,9 @@ static inline void rcu_read_unlock(void) { RCU_LOCKDEP_WARN(!rcu_is_watching(), "rcu_read_unlock() used illegally while idle"); + rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ __release(RCU); __rcu_read_unlock(); - rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ } /** @@ -1059,6 +1090,18 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f) extern int rcu_expedited; extern int rcu_normal; -DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock()) +DEFINE_LOCK_GUARD_0(rcu, + do { + rcu_read_lock(); + /* + * sparse doesn't call the cleanup function, + * so just release immediately and don't track + * the context. We don't need to anyway, since + * the whole point of the guard is to not need + * the explicit unlock. + */ + __release(RCU); + } while (0), + rcu_read_unlock()) #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index d07f0848802e..303ab9bee155 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -19,18 +19,18 @@ struct rcu_synchronize { }; void wakeme_after_rcu(struct rcu_head *head); -void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, +void __wait_rcu_gp(bool checktiny, unsigned int state, int n, call_rcu_func_t *crcu_array, struct rcu_synchronize *rs_array); -#define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ - __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ - __crcu_array, __rs_array); \ +#define _wait_rcu_gp(checktiny, state, ...) \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, state, ARRAY_SIZE(__crcu_array), __crcu_array, __rs_array); \ } while (0) -#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) +#define wait_rcu_gp(...) _wait_rcu_gp(false, TASK_UNINTERRUPTIBLE, __VA_ARGS__) +#define wait_rcu_gp_state(state, ...) _wait_rcu_gp(false, state, __VA_ARGS__) /** * synchronize_rcu_mult - Wait concurrently for multiple grace periods @@ -54,7 +54,7 @@ do { \ * grace period. */ #define synchronize_rcu_mult(...) \ - _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) + _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), TASK_UNINTERRUPTIBLE, __VA_ARGS__) static inline void cond_resched_rcu(void) { diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 85c6df0d1bef..59b3b752394d 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -136,7 +136,8 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } -static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); @@ -177,7 +178,8 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } -static inline void __refcount_add(int i, refcount_t *r, int *oldp) +static inline __signed_wrap +void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); @@ -256,7 +258,8 @@ static inline void refcount_inc(refcount_t *r) __refcount_inc(r, NULL); } -static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) +static inline __must_check __signed_wrap +bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index c9182a47736e..a6bc2980a98b 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -297,15 +297,6 @@ typedef void (*regmap_unlock)(void *); * performed on such table (a register is no increment * readable if it belongs to one of the ranges specified * by rd_noinc_table). - * @disable_locking: This regmap is either protected by external means or - * is guaranteed not to be accessed from multiple threads. - * Don't use any locking mechanisms. - * @lock: Optional lock callback (overrides regmap's default lock - * function, based on spinlock or mutex). - * @unlock: As above for unlocking. - * @lock_arg: this field is passed as the only argument of lock/unlock - * functions (ignored in case regular lock/unlock functions - * are not overridden). * @reg_read: Optional callback that if filled will be used to perform * all the reads from the registers. Should only be provided for * devices whose read operation cannot be represented as a simple @@ -323,6 +314,7 @@ typedef void (*regmap_unlock)(void *); * @write: Same as above for writing. * @max_raw_read: Max raw read size that can be used on the device. * @max_raw_write: Max raw write size that can be used on the device. + * @can_sleep: Optional, specifies whether regmap operations can sleep. * @fast_io: Register IO is fast. Use a spinlock instead of a mutex * to perform locking. This field is ignored if custom lock/unlock * functions are used (see fields lock/unlock of struct regmap_config). @@ -331,7 +323,20 @@ typedef void (*regmap_unlock)(void *); * Use it only for "no-bus" cases. * @io_port: Support IO port accessors. Makes sense only when MMIO vs. IO port * access can be distinguished. + * @disable_locking: This regmap is either protected by external means or + * is guaranteed not to be accessed from multiple threads. + * Don't use any locking mechanisms. + * @lock: Optional lock callback (overrides regmap's default lock + * function, based on spinlock or mutex). + * @unlock: As above for unlocking. + * @lock_arg: This field is passed as the only argument of lock/unlock + * functions (ignored in case regular lock/unlock functions + * are not overridden). * @max_register: Optional, specifies the maximum valid register address. + * @max_register_is_0: Optional, specifies that zero value in @max_register + * should be taken into account. This is a workaround to + * apply handling of @max_register for regmap that contains + * only one register. * @wr_table: Optional, points to a struct regmap_access_table specifying * valid ranges for write access. * @rd_table: As above, for read access. @@ -369,21 +374,20 @@ typedef void (*regmap_unlock)(void *); * @reg_defaults_raw: Power on reset values for registers (for use with * register cache support). * @num_reg_defaults_raw: Number of elements in reg_defaults_raw. - * @reg_format_endian: Endianness for formatted register addresses. If this is - * DEFAULT, the @reg_format_endian_default value from the - * regmap bus is used. - * @val_format_endian: Endianness for formatted register values. If this is - * DEFAULT, the @reg_format_endian_default value from the - * regmap bus is used. - * - * @ranges: Array of configuration entries for virtual address ranges. - * @num_ranges: Number of range configuration entries. * @use_hwlock: Indicate if a hardware spinlock should be used. * @use_raw_spinlock: Indicate if a raw spinlock should be used. * @hwlock_id: Specify the hardware spinlock id. * @hwlock_mode: The hardware spinlock mode, should be HWLOCK_IRQSTATE, * HWLOCK_IRQ or 0. - * @can_sleep: Optional, specifies whether regmap operations can sleep. + * @reg_format_endian: Endianness for formatted register addresses. If this is + * DEFAULT, the @reg_format_endian_default value from the + * regmap bus is used. + * @val_format_endian: Endianness for formatted register values. If this is + * DEFAULT, the @reg_format_endian_default value from the + * regmap bus is used. + * + * @ranges: Array of configuration entries for virtual address ranges. + * @num_ranges: Number of range configuration entries. */ struct regmap_config { const char *name; @@ -402,11 +406,6 @@ struct regmap_config { bool (*writeable_noinc_reg)(struct device *dev, unsigned int reg); bool (*readable_noinc_reg)(struct device *dev, unsigned int reg); - bool disable_locking; - regmap_lock lock; - regmap_unlock unlock; - void *lock_arg; - int (*reg_read)(void *context, unsigned int reg, unsigned int *val); int (*reg_write)(void *context, unsigned int reg, unsigned int val); int (*reg_update_bits)(void *context, unsigned int reg, @@ -418,10 +417,18 @@ struct regmap_config { size_t max_raw_read; size_t max_raw_write; + bool can_sleep; + bool fast_io; bool io_port; + bool disable_locking; + regmap_lock lock; + regmap_unlock unlock; + void *lock_arg; + unsigned int max_register; + bool max_register_is_0; const struct regmap_access_table *wr_table; const struct regmap_access_table *rd_table; const struct regmap_access_table *volatile_table; @@ -443,18 +450,16 @@ struct regmap_config { bool use_relaxed_mmio; bool can_multi_write; - enum regmap_endian reg_format_endian; - enum regmap_endian val_format_endian; - - const struct regmap_range_cfg *ranges; - unsigned int num_ranges; - bool use_hwlock; bool use_raw_spinlock; unsigned int hwlock_id; unsigned int hwlock_mode; - bool can_sleep; + enum regmap_endian reg_format_endian; + enum regmap_endian val_format_endian; + + const struct regmap_range_cfg *ranges; + unsigned int num_ranges; }; /** @@ -1225,6 +1230,7 @@ int regmap_multi_reg_write_bypassed(struct regmap *map, int regmap_raw_write_async(struct regmap *map, unsigned int reg, const void *val, size_t val_len); int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val); +int regmap_read_bypassed(struct regmap *map, unsigned int reg, unsigned int *val); int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_noinc_read(struct regmap *map, unsigned int reg, @@ -1734,6 +1740,13 @@ static inline int regmap_read(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_read_bypassed(struct regmap *map, unsigned int reg, + unsigned int *val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t val_len) { diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 4660582a3302..59d0b9a79e6e 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -164,6 +164,7 @@ struct regulator *__must_check devm_regulator_get_optional(struct device *dev, const char *id); int devm_regulator_get_enable(struct device *dev, const char *id); int devm_regulator_get_enable_optional(struct device *dev, const char *id); +int devm_regulator_get_enable_read_voltage(struct device *dev, const char *id); void regulator_put(struct regulator *regulator); void devm_regulator_put(struct regulator *regulator); @@ -320,12 +321,18 @@ devm_regulator_get_exclusive(struct device *dev, const char *id) static inline int devm_regulator_get_enable(struct device *dev, const char *id) { - return -ENODEV; + return 0; } static inline int devm_regulator_get_enable_optional(struct device *dev, const char *id) { + return 0; +} + +static inline int devm_regulator_get_enable_read_voltage(struct device *dev, + const char *id) +{ return -ENODEV; } diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 22a07c0900a4..f230a472ccd3 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -299,6 +299,8 @@ enum regulator_type { * @vsel_range_reg: Register for range selector when using pickable ranges * and ``regulator_map_*_voltage_*_pickable`` functions. * @vsel_range_mask: Mask for register bitfield used for range selector + * @range_applied_by_vsel: A flag to indicate that changes to vsel_range_reg + * are only effective after vsel_reg is written * @vsel_reg: Register for selector when using ``regulator_map_*_voltage_*`` * @vsel_mask: Mask for register bitfield used for selector * @vsel_step: Specify the resolution of selector stepping when setting @@ -389,6 +391,7 @@ struct regulator_desc { unsigned int vsel_range_reg; unsigned int vsel_range_mask; + bool range_applied_by_vsel; unsigned int vsel_reg; unsigned int vsel_mask; unsigned int vsel_step; diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h index 8313e7ed6aec..a225e9eeb30d 100644 --- a/include/linux/regulator/max8973-regulator.h +++ b/include/linux/regulator/max8973-regulator.h @@ -48,10 +48,6 @@ * control signal from EN input pin. If it is false then * voltage output will be enabled/disabled through EN bit of * device register. - * @enable_gpio: Enable GPIO. If EN pin is controlled through GPIO from host - * then GPIO number can be provided. If no GPIO controlled then - * it should be -1. - * @dvs_gpio: GPIO for dvs. It should be -1 if this is tied with fixed logic. * @dvs_def_state: Default state of dvs. 1 if it is high else 0. */ struct max8973_regulator_platform_data { @@ -59,8 +55,6 @@ struct max8973_regulator_platform_data { unsigned long control_flags; unsigned long junction_temp_warning; bool enable_ext_control; - int enable_gpio; - int dvs_gpio; unsigned dvs_def_state:1; }; diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index 505c908dbb81..243633c8dceb 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -9,6 +9,7 @@ enum pca9450_chip_type { PCA9450_TYPE_PCA9450A = 0, PCA9450_TYPE_PCA9450BC, + PCA9450_TYPE_PCA9451A, PCA9450_TYPE_AMOUNT, }; diff --git a/include/linux/remoteproc/mtk_scp.h b/include/linux/remoteproc/mtk_scp.h index 7c2b7cc9fe6c..344ff41c22c7 100644 --- a/include/linux/remoteproc/mtk_scp.h +++ b/include/linux/remoteproc/mtk_scp.h @@ -43,6 +43,7 @@ enum scp_ipi_id { SCP_IPI_CROS_HOST_CMD, SCP_IPI_VDEC_LAT, SCP_IPI_VDEC_CORE, + SCP_IPI_IMGSYS_CMD, SCP_IPI_NS_SERVICE = 0xFF, SCP_IPI_MAX = 0x100, }; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 66942d7fba7f..a365f67131ec 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,12 @@ #include <linux/list.h> #include <linux/pid.h> +/* CLOSID, RMID value used by the default control group */ +#define RESCTRL_RESERVED_CLOSID 0 +#define RESCTRL_RESERVED_RMID 0 + +#define RESCTRL_PICK_ANY_CPU -1 + #ifdef CONFIG_PROC_CPU_RESCTRL int proc_resctrl_show(struct seq_file *m, @@ -153,7 +159,7 @@ struct resctrl_schema; * @cache_level: Which cache level defines scope of this resource * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. - * @domains: All domains for this resource + * @domains: RCU list of all domains for this resource * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. @@ -219,36 +225,70 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_online_cpu(unsigned int cpu); +void resctrl_offline_cpu(unsigned int cpu); /** * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid * for this resource and domain. * @r: resource that the counter should be read from. * @d: domain that the counter should be read from. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid + * only. * @rmid: rmid of the counter to read. * @eventid: eventid to read, e.g. L3 occupancy. * @val: result of the counter read in bytes. + * @arch_mon_ctx: An architecture specific value from + * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies + * the hardware monitor allocated for this read request. * - * Call from process context on a CPU that belongs to domain @d. + * Some architectures need to sleep when first programming some of the counters. + * (specifically: arm64's MPAM cache occupancy counters can return 'not ready' + * for a short period of time). Call from a non-migrateable process context on + * a CPU that belongs to domain @d. e.g. use smp_call_on_cpu() or + * schedule_work_on(). This function can be called with interrupts masked, + * e.g. using smp_call_function_any(), but may consistently return an error. * * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid, u64 *val); + u32 closid, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *arch_mon_ctx); + +/** + * resctrl_arch_rmid_read_context_check() - warn about invalid contexts + * + * When built with CONFIG_DEBUG_ATOMIC_SLEEP generate a warning when + * resctrl_arch_rmid_read() is called with preemption disabled. + * + * The contract with resctrl_arch_rmid_read() is that if interrupts + * are unmasked, it can sleep. This allows NOHZ_FULL systems to use an + * IPI, (and fail if the call needed to sleep), while most of the time + * the work is scheduled, allowing the call to sleep. + */ +static inline void resctrl_arch_rmid_read_context_check(void) +{ + if (!irqs_disabled()) + might_sleep(); +} /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. * @r: The domain's resource. * @d: The rmid's domain. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid only. * @rmid: The rmid whose counter values should be reset. * @eventid: The eventid whose counter values should be reset. * * This can be called from any CPU. */ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, - u32 rmid, enum resctrl_event_id eventid); + u32 closid, u32 rmid, + enum resctrl_event_id eventid); /** * resctrl_arch_reset_rmid_all() - Reset all private state associated with diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index 0fa4f60e1186..357df16ede32 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -60,6 +60,9 @@ struct reset_control_lookup { * @reset_control_head: head of internal list of requested reset controls * @dev: corresponding driver model device struct * @of_node: corresponding device tree node as phandle target + * @of_args: for reset-gpios controllers: corresponding phandle args with + * of_node and GPIO number complementing of_node; either this or + * of_node should be present * @of_reset_n_cells: number of cells in reset line specifiers * @of_xlate: translation function to translate from specifier as found in the * device tree to id as given to the reset control ops, defaults @@ -73,6 +76,7 @@ struct reset_controller_dev { struct list_head reset_control_head; struct device *dev; struct device_node *of_node; + const struct of_phandle_args *of_args; int of_reset_n_cells; int (*of_xlate)(struct reset_controller_dev *rcdev, const struct of_phandle_args *reset_spec); diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index b6f3797277ff..015c8298bebc 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -9,6 +9,7 @@ #ifndef _LINUX_RHASHTABLE_TYPES_H #define _LINUX_RHASHTABLE_TYPES_H +#include <linux/alloc_tag.h> #include <linux/atomic.h> #include <linux/compiler.h> #include <linux/mutex.h> @@ -88,6 +89,9 @@ struct rhashtable { struct mutex mutex; spinlock_t lock; atomic_t nelems; +#ifdef CONFIG_MEM_ALLOC_PROFILING + struct alloc_tag *alloc_tag; +#endif }; /** @@ -127,9 +131,12 @@ struct rhashtable_iter { bool end_of_table; }; -int rhashtable_init(struct rhashtable *ht, +int rhashtable_init_noprof(struct rhashtable *ht, const struct rhashtable_params *params); -int rhltable_init(struct rhltable *hlt, +#define rhashtable_init(...) alloc_hooks(rhashtable_init_noprof(__VA_ARGS__)) + +int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params); +#define rhltable_init(...) alloc_hooks(rhltable_init_noprof(__VA_ARGS__)) #endif /* _LINUX_RHASHTABLE_TYPES_H */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5b5357c0bd8c..8463a128e2f4 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -633,7 +633,7 @@ restart: * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. + * for an entry with an identical key. The first matching entry is returned. * * This must only be called under the RCU read lock. * @@ -655,7 +655,7 @@ static inline void *rhashtable_lookup( * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. + * for an entry with an identical key. The first matching entry is returned. * * Only use this function when you have other mechanisms guaranteeing * that the object won't go away after the RCU read lock is released. @@ -682,7 +682,7 @@ static inline void *rhashtable_lookup_fast( * @params: hash table parameters * * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. All matching entries are returned + * for an entry with an identical key. All matching entries are returned * in a list. * * This must only be called under the RCU read lock. @@ -699,7 +699,7 @@ static inline struct rhlist_head *rhltable_lookup( } /* Internal function, please use rhashtable_insert_fast() instead. This - * function returns the existing element already in hashes in there is a clash, + * function returns the existing element already in hashes if there is a clash, * otherwise it returns an error via ERR_PTR(). */ static inline void *__rhashtable_insert_fast( @@ -1130,7 +1130,7 @@ static inline int rhashtable_remove_fast( * * Since the hash chain is single linked, the removal operation needs to * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. + * considerably slower if the hash table is not correctly sized. * * Will automatically shrink the table if permitted when residency drops * below 30% diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fa802db216f9..96d2140b471e 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -6,6 +6,8 @@ #include <linux/seq_file.h> #include <linux/poll.h> +#include <uapi/linux/trace_mmap.h> + struct trace_buffer; struct ring_buffer_iter; @@ -98,7 +100,9 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); +typedef bool (*ring_buffer_cond_fn)(void *data); +int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full, + ring_buffer_cond_fn cond, void *data); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); @@ -221,4 +225,8 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); #define trace_rb_cpu_prepare NULL #endif +int ring_buffer_map(struct trace_buffer *buffer, int cpu, + struct vm_area_struct *vma); +int ring_buffer_unmap(struct trace_buffer *buffer, int cpu); +int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu); #endif /* _LINUX_RING_BUFFER_H */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b7944a833668..7229b9baf20d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -273,6 +273,7 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, ClearPageAnonExclusive(&folio->page); } atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); return 0; } @@ -284,7 +285,7 @@ static inline int hugetlb_try_share_anon_rmap(struct folio *folio) VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio); /* Paired with the memory barrier in try_grab_folio(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb(); if (unlikely(folio_maybe_dma_pinned(folio))) @@ -295,7 +296,7 @@ static inline int hugetlb_try_share_anon_rmap(struct folio *folio) * This is conceptually a smp_wmb() paired with the smp_rmb() in * gup_must_unshare(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb__after_atomic(); return 0; } @@ -306,6 +307,7 @@ static inline void hugetlb_add_file_rmap(struct folio *folio) VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); } static inline void hugetlb_remove_rmap(struct folio *folio) @@ -313,21 +315,31 @@ static inline void hugetlb_remove_rmap(struct folio *folio) VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); atomic_dec(&folio->_entire_mapcount); + atomic_dec(&folio->_large_mapcount); } static __always_inline void __folio_dup_file_rmap(struct folio *folio, struct page *page, int nr_pages, enum rmap_level level) { + const int orig_nr_pages = nr_pages; + __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { case RMAP_LEVEL_PTE: + if (!folio_test_large(folio)) { + atomic_inc(&page->_mapcount); + break; + } + do { atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); + atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); break; } } @@ -347,8 +359,12 @@ static inline void folio_dup_file_rmap_ptes(struct folio *folio, { __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); } -#define folio_dup_file_rmap_pte(folio, page) \ - folio_dup_file_rmap_ptes(folio, page, 1) + +static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, + struct page *page) +{ + __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); +} /** * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio @@ -373,6 +389,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *src_vma, enum rmap_level level) { + const int orig_nr_pages = nr_pages; bool maybe_pinned; int i; @@ -401,11 +418,20 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, if (PageAnonExclusive(page + i)) return -EBUSY; } + + if (!folio_test_large(folio)) { + if (PageAnonExclusive(page)) + ClearPageAnonExclusive(page); + atomic_inc(&page->_mapcount); + break; + } + do { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); + atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: if (PageAnonExclusive(page)) { @@ -414,6 +440,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, ClearPageAnonExclusive(page); } atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); break; } return 0; @@ -448,8 +475,13 @@ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, RMAP_LEVEL_PTE); } -#define folio_try_dup_anon_rmap_pte(folio, page, vma) \ - folio_try_dup_anon_rmap_ptes(folio, page, 1, vma) + +static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, + struct page *page, struct vm_area_struct *src_vma) +{ + return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, + RMAP_LEVEL_PTE); +} /** * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range @@ -541,7 +573,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, */ /* Paired with the memory barrier in try_grab_folio(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb(); if (unlikely(folio_maybe_dma_pinned(folio))) @@ -552,7 +584,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, * This is conceptually a smp_wmb() paired with the smp_rmb() in * gup_must_unshare(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb__after_atomic(); return 0; } @@ -698,7 +730,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); -int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); +unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); /* * rmap_walk_control: To control rmap traversing for specific needs diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 5f8e438a0312..3f4d315aaec9 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -42,7 +42,7 @@ static inline time64_t rtc_tm_sub(struct rtc_time *lhs, struct rtc_time *rhs) #include <linux/timerqueue.h> #include <linux/workqueue.h> -extern struct class *rtc_class; +extern const struct class rtc_class; /* * For these RTC methods the device parameter is the physical device diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 410529fca18b..a7da7dfc06a2 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -7,6 +7,7 @@ #include <linux/netdevice.h> #include <linux/wait.h> #include <linux/refcount.h> +#include <linux/cleanup.h> #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -46,7 +47,10 @@ extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); +DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock()) + extern wait_queue_head_t netdev_unregistering_wq; +extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; @@ -171,4 +175,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } +void netdev_set_operstate(struct net_device *dev, int newstate); + #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h new file mode 100644 index 000000000000..309ca72f2dfb --- /dev/null +++ b/include/linux/rw_hint.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RW_HINT_H +#define _LINUX_RW_HINT_H + +#include <linux/build_bug.h> +#include <linux/compiler_attributes.h> +#include <uapi/linux/fcntl.h> + +/* Block storage write lifetime hint values. */ +enum rw_hint { + WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +} __packed; + +/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ +#ifndef __CHECKER__ +static_assert(sizeof(enum rw_hint) == 1); +#endif + +#endif /* _LINUX_RW_HINT_H */ diff --git a/include/linux/rwbase_rt.h b/include/linux/rwbase_rt.h index 1d264dd08625..f2394a409c9d 100644 --- a/include/linux/rwbase_rt.h +++ b/include/linux/rwbase_rt.h @@ -26,12 +26,17 @@ struct rwbase_rt { } while (0) -static __always_inline bool rw_base_is_locked(struct rwbase_rt *rwb) +static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb) { return atomic_read(&rwb->readers) != READER_BIAS; } -static __always_inline bool rw_base_is_contended(struct rwbase_rt *rwb) +static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb) +{ + return atomic_read(&rwb->readers) == WRITER_BIAS; +} + +static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb) { return atomic_read(&rwb->readers) > 0; } diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 9c29689ff505..c8b543d428b0 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -66,14 +66,24 @@ struct rw_semaphore { #endif }; -/* In all implementations count != 0 means locked */ +#define RWSEM_UNLOCKED_VALUE 0UL +#define RWSEM_WRITER_LOCKED (1UL << 0) +#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) + static inline int rwsem_is_locked(struct rw_semaphore *sem) { - return atomic_long_read(&sem->count) != 0; + return atomic_long_read(&sem->count) != RWSEM_UNLOCKED_VALUE; } -#define RWSEM_UNLOCKED_VALUE 0L -#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) +static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(atomic_long_read(&sem->count) == RWSEM_UNLOCKED_VALUE); +} + +static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!(atomic_long_read(&sem->count) & RWSEM_WRITER_LOCKED)); +} /* Common initializer macros and functions */ @@ -152,11 +162,21 @@ do { \ __init_rwsem((sem), #sem, &__key); \ } while (0) -static __always_inline int rwsem_is_locked(struct rw_semaphore *sem) +static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem) { return rw_base_is_locked(&sem->rwbase); } +static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!rwsem_is_locked(sem)); +} + +static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem) +{ + WARN_ON(!rw_base_is_write_locked(&sem->rwbase)); +} + static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) { return rw_base_is_contended(&sem->rwbase); @@ -169,6 +189,22 @@ static __always_inline int rwsem_is_contended(struct rw_semaphore *sem) * the RT specific variant. */ +static inline void rwsem_assert_held(const struct rw_semaphore *sem) +{ + if (IS_ENABLED(CONFIG_LOCKDEP)) + lockdep_assert_held(sem); + else + rwsem_assert_held_nolockdep(sem); +} + +static inline void rwsem_assert_held_write(const struct rw_semaphore *sem) +{ + if (IS_ENABLED(CONFIG_LOCKDEP)) + lockdep_assert_held_write(sem); + else + rwsem_assert_held_write_nolockdep(sem); +} + /* * lock for reading */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ffe8f618ab86..61591ac6eab6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -301,7 +301,7 @@ enum { TASK_COMM_LEN = 16, }; -extern void scheduler_tick(void); +extern void sched_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX @@ -770,6 +770,10 @@ struct task_struct { unsigned int flags; unsigned int ptrace; +#ifdef CONFIG_MEM_ALLOC_PROFILING + struct alloc_tag *alloc_tag; +#endif + #ifdef CONFIG_SMP int on_cpu; struct __call_single_node wake_entry; @@ -810,6 +814,7 @@ struct task_struct { struct task_group *sched_task_group; #endif + #ifdef CONFIG_UCLAMP_TASK /* * Clamp values requested for a scheduling entity. @@ -835,6 +840,7 @@ struct task_struct { #endif unsigned int policy; + unsigned long max_allowed_capacity; int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t *user_cpus_ptr; @@ -858,6 +864,8 @@ struct task_struct { u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; + int rcu_tasks_exit_cpu; + struct list_head rcu_tasks_exit_list; #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU @@ -1259,6 +1267,7 @@ struct task_struct { /* Protected by alloc_lock: */ struct mempolicy *mempolicy; short il_prev; + u8 il_weight; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING @@ -1440,8 +1449,6 @@ struct task_struct { #ifdef CONFIG_MEMCG struct mem_cgroup *memcg_in_oom; - gfp_t memcg_oom_gfp_mask; - int memcg_oom_order; /* Number of pages to reclaim on returning to userland: */ unsigned int memcg_nr_pages_over_high; @@ -1623,26 +1630,27 @@ extern struct pid *cad_pid; #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ -#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF__HOLE__00010000 0x00010000 #define PF_KSWAPD 0x00020000 /* I am kswapd */ -#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ -#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ +#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to, * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ -#define PF__HOLE__00800000 0x00800000 -#define PF__HOLE__01000000 0x01000000 +#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */ +#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */ #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */ -#define PF__HOLE__20000000 0x20000000 +#define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning. + * See memalloc_pin_save() */ +#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ @@ -2183,4 +2191,23 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } extern void sched_set_stop_task(int cpu, struct task_struct *stop); +#ifdef CONFIG_MEM_ALLOC_PROFILING +static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) +{ + swap(current->alloc_tag, tag); + return tag; +} + +static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) +{ +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); +#endif + current->alloc_tag = old; +} +#else +#define alloc_tag_save(_tag) NULL +#define alloc_tag_restore(_tag, _old) do {} while (0) +#endif + #endif diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 02f5090ffea2..e62ff805cfc9 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -92,9 +92,12 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_VM_MERGE_ANY 30 #define MMF_VM_MERGE_ANY_MASK (1 << MMF_VM_MERGE_ANY) +#define MMF_TOPDOWN 31 /* mm searches top down by default */ +#define MMF_TOPDOWN_MASK (1 << MMF_TOPDOWN) + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ - MMF_VM_MERGE_ANY_MASK) + MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) static inline unsigned long mmf_init_flags(unsigned long flags) { diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index 478084f9105e..e670ac282333 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -5,8 +5,8 @@ #include <linux/sched.h> enum cpu_idle_type { + __CPU_NOT_IDLE = 0, CPU_IDLE, - CPU_NOT_IDLE, CPU_NEWLY_IDLE, CPU_MAX_IDLE_TYPES }; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 9a19f1b42f64..91546493c43d 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -8,6 +8,7 @@ #include <linux/mm_types.h> #include <linux/gfp.h> #include <linux/sync_core.h> +#include <linux/sched/coredump.h> /* * Routines for handling mm_structs @@ -186,6 +187,27 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); +unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *filp, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags); + +unsigned long +arch_get_unmapped_area_vmflags(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t vm_flags); +unsigned long +arch_get_unmapped_area_topdown_vmflags(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags, vm_flags_t); + +unsigned long mm_get_unmapped_area_vmflags(struct mm_struct *mm, + struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags, + vm_flags_t vm_flags); + unsigned long generic_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -236,16 +258,25 @@ static inline gfp_t current_gfp_context(gfp_t flags) { unsigned int pflags = READ_ONCE(current->flags); - if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) { + if (unlikely(pflags & (PF_MEMALLOC_NOIO | + PF_MEMALLOC_NOFS | + PF_MEMALLOC_NORECLAIM | + PF_MEMALLOC_NOWARN | + PF_MEMALLOC_PIN))) { /* - * NOIO implies both NOIO and NOFS and it is a weaker context - * so always make sure it makes precedence + * Stronger flags before weaker flags: + * NORECLAIM implies NOIO, which in turn implies NOFS */ - if (pflags & PF_MEMALLOC_NOIO) + if (pflags & PF_MEMALLOC_NORECLAIM) + flags &= ~__GFP_DIRECT_RECLAIM; + else if (pflags & PF_MEMALLOC_NOIO) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; + if (pflags & PF_MEMALLOC_NOWARN) + flags |= __GFP_NOWARN; + if (pflags & PF_MEMALLOC_PIN) flags &= ~__GFP_MOVABLE; } @@ -307,6 +338,24 @@ static inline void might_alloc(gfp_t gfp_mask) } /** + * memalloc_flags_save - Add a PF_* flag to current->flags, save old value + * + * This allows PF_* flags to be conveniently added, irrespective of current + * value, and then the old version restored with memalloc_flags_restore(). + */ +static inline unsigned memalloc_flags_save(unsigned flags) +{ + unsigned oldflags = ~current->flags & flags; + current->flags |= flags; + return oldflags; +} + +static inline void memalloc_flags_restore(unsigned flags) +{ + current->flags &= ~flags; +} + +/** * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope. * * This functions marks the beginning of the GFP_NOIO allocation scope. @@ -315,13 +364,12 @@ static inline void might_alloc(gfp_t gfp_mask) * point of view. Use memalloc_noio_restore to end the scope with flags * returned by this function. * - * This function is safe to be used from any context. + * Context: This function is safe to be used from any context. + * Return: The saved flags to be passed to memalloc_noio_restore. */ static inline unsigned int memalloc_noio_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOIO; - current->flags |= PF_MEMALLOC_NOIO; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOIO); } /** @@ -334,7 +382,7 @@ static inline unsigned int memalloc_noio_save(void) */ static inline void memalloc_noio_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; + memalloc_flags_restore(flags); } /** @@ -346,13 +394,12 @@ static inline void memalloc_noio_restore(unsigned int flags) * point of view. Use memalloc_nofs_restore to end the scope with flags * returned by this function. * - * This function is safe to be used from any context. + * Context: This function is safe to be used from any context. + * Return: The saved flags to be passed to memalloc_nofs_restore. */ static inline unsigned int memalloc_nofs_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_NOFS; - current->flags |= PF_MEMALLOC_NOFS; - return flags; + return memalloc_flags_save(PF_MEMALLOC_NOFS); } /** @@ -365,32 +412,76 @@ static inline unsigned int memalloc_nofs_save(void) */ static inline void memalloc_nofs_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; + memalloc_flags_restore(flags); } +/** + * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope. + * + * This function marks the beginning of the __GFP_MEMALLOC allocation scope. + * All further allocations will implicitly add the __GFP_MEMALLOC flag, which + * prevents entering reclaim and allows access to all memory reserves. This + * should only be used when the caller guarantees the allocation will allow more + * memory to be freed very shortly, i.e. it needs to allocate some memory in + * the process of freeing memory, and cannot reclaim due to potential recursion. + * + * Users of this scope have to be extremely careful to not deplete the reserves + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. Usage of a + * pre-allocated pool (e.g. mempool) should be always considered before using + * this scope. + * + * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC + * + * Context: This function should not be used in an interrupt context as that one + * does not give PF_MEMALLOC access to reserves. + * See __gfp_pfmemalloc_flags(). + * Return: The saved flags to be passed to memalloc_noreclaim_restore. + */ static inline unsigned int memalloc_noreclaim_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - return flags; + return memalloc_flags_save(PF_MEMALLOC); } +/** + * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope. + * @flags: Flags to restore. + * + * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save + * function. Always make sure that the given flags is the return value from the + * pairing memalloc_noreclaim_save call. + */ static inline void memalloc_noreclaim_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC) | flags; + memalloc_flags_restore(flags); } +/** + * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope. + * + * This function marks the beginning of the ~__GFP_MOVABLE allocation scope. + * All further allocations will implicitly remove the __GFP_MOVABLE flag, which + * will constraint the allocations to zones that allow long term pinning, i.e. + * not ZONE_MOVABLE zones. + * + * Return: The saved flags to be passed to memalloc_pin_restore. + */ static inline unsigned int memalloc_pin_save(void) { - unsigned int flags = current->flags & PF_MEMALLOC_PIN; - - current->flags |= PF_MEMALLOC_PIN; - return flags; + return memalloc_flags_save(PF_MEMALLOC_PIN); } +/** + * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope. + * @flags: Flags to restore. + * + * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function. + * Always make sure that the given flags is the return value from the pairing + * memalloc_pin_save call. + */ static inline void memalloc_pin_restore(unsigned int flags) { - current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags; + memalloc_flags_restore(flags); } #ifdef CONFIG_MEMCG diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index a8b28647aafc..b04a5d04dee9 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -117,13 +117,13 @@ SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS) /* - * Domain members share CPU package resources (i.e. caches) + * Domain members share CPU Last Level Caches * * SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share * the same cache(s). * NEEDS_GROUPS: Caches are shared between groups. */ -SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) +SD_FLAG(SD_SHARE_LLC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS) /* * Only a single load balancing instance diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 4b7664c56208..0a0e23c45406 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -735,8 +735,6 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern bool thread_group_exited(struct pid *pid); - extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, unsigned long *flags); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index a6e04b4a21d7..4237daa5ac7a 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -38,21 +38,21 @@ extern const struct sd_flag_debug sd_flag_debug[]; #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { - return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_CLUSTER static inline int cpu_cluster_flags(void) { - return SD_CLUSTER | SD_SHARE_PKG_RESOURCES; + return SD_CLUSTER | SD_SHARE_LLC; } #endif #ifdef CONFIG_SCHED_MC static inline int cpu_core_flags(void) { - return SD_SHARE_PKG_RESOURCES; + return SD_SHARE_LLC; } #endif @@ -110,7 +110,7 @@ struct sched_domain { unsigned long last_decay_max_lb_cost; #ifdef CONFIG_SCHEDSTATS - /* load_balance() stats */ + /* sched_balance_rq() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; @@ -176,6 +176,7 @@ extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], cpumask_var_t *alloc_sched_domains(unsigned int ndoms); void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); +bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); bool cpus_share_resources(int this_cpu, int that_cpu); @@ -226,6 +227,11 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], { } +static inline bool cpus_equal_capacity(int this_cpu, int that_cpu) +{ + return true; +} + static inline bool cpus_share_cache(int this_cpu, int that_cpu) { return true; @@ -264,17 +270,17 @@ unsigned long arch_scale_cpu_capacity(int cpu) } #endif -#ifndef arch_scale_thermal_pressure +#ifndef arch_scale_hw_pressure static __always_inline -unsigned long arch_scale_thermal_pressure(int cpu) +unsigned long arch_scale_hw_pressure(int cpu) { return 0; } #endif -#ifndef arch_update_thermal_pressure +#ifndef arch_update_hw_pressure static __always_inline -void arch_update_thermal_pressure(const struct cpumask *cpus, +void arch_update_hw_pressure(const struct cpumask *cpus, unsigned long capped_frequency) { } #endif diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index bc60243d43b3..25446c5d3508 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -4,7 +4,8 @@ struct vhost_task; -struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), + void (*handle_kill)(void *), void *arg, const char *name); void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index f2f05fb42d28..3a9bb5b9a9e8 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -47,6 +47,10 @@ struct scmi_clock_info { bool rate_discrete; bool rate_changed_notifications; bool rate_change_requested_notifications; + bool state_ctrl_forbidden; + bool rate_ctrl_forbidden; + bool parent_ctrl_forbidden; + bool extended_config; union { struct { int num_rates; @@ -72,6 +76,13 @@ struct scmi_handle; struct scmi_device; struct scmi_protocol_handle; +enum scmi_clock_oem_config { + SCMI_CLOCK_CFG_DUTY_CYCLE = 0x1, + SCMI_CLOCK_CFG_PHASE, + SCMI_CLOCK_CFG_OEM_START = 0x80, + SCMI_CLOCK_CFG_OEM_END = 0xFF, +}; + /** * struct scmi_clk_proto_ops - represents the various operations provided * by SCMI Clock Protocol @@ -104,10 +115,11 @@ struct scmi_clk_proto_ops { int (*state_get)(const struct scmi_protocol_handle *ph, u32 clk_id, bool *enabled, bool atomic); int (*config_oem_get)(const struct scmi_protocol_handle *ph, u32 clk_id, - u8 oem_type, u32 *oem_val, u32 *attributes, - bool atomic); + enum scmi_clock_oem_config oem_type, + u32 *oem_val, u32 *attributes, bool atomic); int (*config_oem_set)(const struct scmi_protocol_handle *ph, u32 clk_id, - u8 oem_type, u32 oem_val, bool atomic); + enum scmi_clock_oem_config oem_type, + u32 oem_val, bool atomic); int (*parent_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 *parent_id); int (*parent_set)(const struct scmi_protocol_handle *ph, u32 clk_id, u32 parent_id); }; @@ -128,6 +140,8 @@ struct scmi_perf_domain_info { * @level_set: sets the performance level of a domain * @level_get: gets the performance level of a domain * @transition_latency_get: gets the DVFS transition latency for a given device + * @rate_limit_get: gets the minimum time (us) required between successive + * requests * @device_opps_add: adds all the OPPs for a given device * @freq_set: sets the frequency for a given device using sustained frequency * to sustained performance level mapping @@ -137,6 +151,8 @@ struct scmi_perf_domain_info { * at a given frequency * @fast_switch_possible: indicates if fast DVFS switching is possible or not * for a given device + * @fast_switch_rate_limit: gets the minimum time (us) required between + * successive fast_switching requests * @power_scale_mw_get: indicates if the power values provided are in milliWatts * or in some other (abstract) scale */ @@ -154,6 +170,8 @@ struct scmi_perf_proto_ops { u32 *level, bool poll); int (*transition_latency_get)(const struct scmi_protocol_handle *ph, u32 domain); + int (*rate_limit_get)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); int (*device_opps_add)(const struct scmi_protocol_handle *ph, struct device *dev, u32 domain); int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain, @@ -164,6 +182,8 @@ struct scmi_perf_proto_ops { unsigned long *rate, unsigned long *power); bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph, u32 domain); + int (*fast_switch_rate_limit)(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit); enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph); }; @@ -717,6 +737,89 @@ struct scmi_powercap_proto_ops { u32 *power_thresh_high); }; +enum scmi_pinctrl_selector_type { + PIN_TYPE = 0, + GROUP_TYPE, + FUNCTION_TYPE, +}; + +enum scmi_pinctrl_conf_type { + SCMI_PIN_DEFAULT = 0, + SCMI_PIN_BIAS_BUS_HOLD = 1, + SCMI_PIN_BIAS_DISABLE = 2, + SCMI_PIN_BIAS_HIGH_IMPEDANCE = 3, + SCMI_PIN_BIAS_PULL_UP = 4, + SCMI_PIN_BIAS_PULL_DEFAULT = 5, + SCMI_PIN_BIAS_PULL_DOWN = 6, + SCMI_PIN_DRIVE_OPEN_DRAIN = 7, + SCMI_PIN_DRIVE_OPEN_SOURCE = 8, + SCMI_PIN_DRIVE_PUSH_PULL = 9, + SCMI_PIN_DRIVE_STRENGTH = 10, + SCMI_PIN_INPUT_DEBOUNCE = 11, + SCMI_PIN_INPUT_MODE = 12, + SCMI_PIN_PULL_MODE = 13, + SCMI_PIN_INPUT_VALUE = 14, + SCMI_PIN_INPUT_SCHMITT = 15, + SCMI_PIN_LOW_POWER_MODE = 16, + SCMI_PIN_OUTPUT_MODE = 17, + SCMI_PIN_OUTPUT_VALUE = 18, + SCMI_PIN_POWER_SOURCE = 19, + SCMI_PIN_SLEW_RATE = 20, + SCMI_PIN_OEM_START = 192, + SCMI_PIN_OEM_END = 255, +}; + +/** + * struct scmi_pinctrl_proto_ops - represents the various operations provided + * by SCMI Pinctrl Protocol + * + * @count_get: returns count of the registered elements in given type + * @name_get: returns name by index of given type + * @group_pins_get: returns the set of pins, assigned to the specified group + * @function_groups_get: returns the set of groups, assigned to the specified + * function + * @mux_set: set muxing function for groups of pins + * @settings_get_one: returns one configuration parameter for pin or group + * specified by config_type + * @settings_get_all: returns all configuration parameters for pin or group + * @settings_conf: sets the configuration parameter for pin or group + * @pin_request: aquire pin before selecting mux setting + * @pin_free: frees pin, acquired by request_pin call + */ +struct scmi_pinctrl_proto_ops { + int (*count_get)(const struct scmi_protocol_handle *ph, + enum scmi_pinctrl_selector_type type); + int (*name_get)(const struct scmi_protocol_handle *ph, u32 selector, + enum scmi_pinctrl_selector_type type, + const char **name); + int (*group_pins_get)(const struct scmi_protocol_handle *ph, + u32 selector, const unsigned int **pins, + unsigned int *nr_pins); + int (*function_groups_get)(const struct scmi_protocol_handle *ph, + u32 selector, unsigned int *nr_groups, + const unsigned int **groups); + int (*mux_set)(const struct scmi_protocol_handle *ph, u32 selector, + u32 group); + int (*settings_get_one)(const struct scmi_protocol_handle *ph, + u32 selector, + enum scmi_pinctrl_selector_type type, + enum scmi_pinctrl_conf_type config_type, + u32 *config_value); + int (*settings_get_all)(const struct scmi_protocol_handle *ph, + u32 selector, + enum scmi_pinctrl_selector_type type, + unsigned int *nr_configs, + enum scmi_pinctrl_conf_type *config_types, + u32 *config_values); + int (*settings_conf)(const struct scmi_protocol_handle *ph, + u32 selector, enum scmi_pinctrl_selector_type type, + unsigned int nr_configs, + enum scmi_pinctrl_conf_type *config_type, + u32 *config_value); + int (*pin_request)(const struct scmi_protocol_handle *ph, u32 pin); + int (*pin_free)(const struct scmi_protocol_handle *ph, u32 pin); +}; + /** * struct scmi_notify_ops - represents notifications' operations provided by * SCMI core @@ -763,8 +866,6 @@ struct scmi_notify_ops { const u32 *src_id, struct notifier_block *nb); int (*devm_event_notifier_unregister)(struct scmi_device *sdev, - u8 proto_id, u8 evt_id, - const u32 *src_id, struct notifier_block *nb); int (*event_notifier_register)(const struct scmi_handle *handle, u8 proto_id, u8 evt_id, @@ -824,6 +925,7 @@ enum scmi_std_protocol { SCMI_PROTOCOL_RESET = 0x16, SCMI_PROTOCOL_VOLTAGE = 0x17, SCMI_PROTOCOL_POWERCAP = 0x18, + SCMI_PROTOCOL_PINCTRL = 0x19, }; enum scmi_system_events { @@ -953,6 +1055,8 @@ struct scmi_perf_limits_report { unsigned int domain_id; unsigned int range_max; unsigned int range_min; + unsigned long range_max_freq; + unsigned long range_min_freq; }; struct scmi_perf_level_report { @@ -960,6 +1064,7 @@ struct scmi_perf_level_report { unsigned int agent_id; unsigned int domain_id; unsigned int performance_level; + unsigned long performance_level_freq; }; struct scmi_sensor_trip_point_report { diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index eab7081392d5..75303c126285 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -4,6 +4,132 @@ #include <uapi/linux/screen_info.h> +#include <linux/bits.h> + +/** + * SCREEN_INFO_MAX_RESOURCES - maximum number of resources per screen_info + */ +#define SCREEN_INFO_MAX_RESOURCES 3 + +struct pci_dev; +struct resource; + +static inline bool __screen_info_has_lfb(unsigned int type) +{ + return (type == VIDEO_TYPE_VLFB) || (type == VIDEO_TYPE_EFI); +} + +static inline u64 __screen_info_lfb_base(const struct screen_info *si) +{ + u64 lfb_base = si->lfb_base; + + if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE) + lfb_base |= (u64)si->ext_lfb_base << 32; + + return lfb_base; +} + +static inline void __screen_info_set_lfb_base(struct screen_info *si, u64 lfb_base) +{ + si->lfb_base = lfb_base & GENMASK_ULL(31, 0); + si->ext_lfb_base = (lfb_base & GENMASK_ULL(63, 32)) >> 32; + + if (si->ext_lfb_base) + si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + else + si->capabilities &= ~VIDEO_CAPABILITY_64BIT_BASE; +} + +static inline u64 __screen_info_lfb_size(const struct screen_info *si, unsigned int type) +{ + u64 lfb_size = si->lfb_size; + + if (type == VIDEO_TYPE_VLFB) + lfb_size <<= 16; + return lfb_size; +} + +static inline unsigned int __screen_info_video_type(unsigned int type) +{ + switch (type) { + case VIDEO_TYPE_MDA: + case VIDEO_TYPE_CGA: + case VIDEO_TYPE_EGAM: + case VIDEO_TYPE_EGAC: + case VIDEO_TYPE_VGAC: + case VIDEO_TYPE_VLFB: + case VIDEO_TYPE_PICA_S3: + case VIDEO_TYPE_MIPS_G364: + case VIDEO_TYPE_SGI: + case VIDEO_TYPE_TGAC: + case VIDEO_TYPE_SUN: + case VIDEO_TYPE_SUNPCI: + case VIDEO_TYPE_PMAC: + case VIDEO_TYPE_EFI: + return type; + default: + return 0; + } +} + +/** + * screen_info_video_type() - Decodes the video type from struct screen_info + * @si: an instance of struct screen_info + * + * Returns: + * A VIDEO_TYPE_ constant representing si's type of video display, or 0 otherwise. + */ +static inline unsigned int screen_info_video_type(const struct screen_info *si) +{ + unsigned int type; + + // check if display output is on + if (!si->orig_video_isVGA) + return 0; + + // check for a known VIDEO_TYPE_ constant + type = __screen_info_video_type(si->orig_video_isVGA); + if (type) + return si->orig_video_isVGA; + + // check if text mode has been initialized + if (!si->orig_video_lines || !si->orig_video_cols) + return 0; + + // 80x25 text, mono + if (si->orig_video_mode == 0x07) { + if ((si->orig_video_ega_bx & 0xff) != 0x10) + return VIDEO_TYPE_EGAM; + else + return VIDEO_TYPE_MDA; + } + + // EGA/VGA, 16 colors + if ((si->orig_video_ega_bx & 0xff) != 0x10) { + if (si->orig_video_isVGA) + return VIDEO_TYPE_VGAC; + else + return VIDEO_TYPE_EGAC; + } + + // the rest... + return VIDEO_TYPE_CGA; +} + +ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); + +#if defined(CONFIG_PCI) +void screen_info_apply_fixups(void); +struct pci_dev *screen_info_pci_dev(const struct screen_info *si); +#else +static inline void screen_info_apply_fixups(void) +{ } +static inline struct pci_dev *screen_info_pci_dev(const struct screen_info *si) +{ + return NULL; +} +#endif + extern struct screen_info screen_info; #endif /* _SCREEN_INFO_H */ diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h index 35f3a4a8ceb1..e918f96881f5 100644 --- a/include/linux/secretmem.h +++ b/include/linux/secretmem.h @@ -6,25 +6,8 @@ extern const struct address_space_operations secretmem_aops; -static inline bool folio_is_secretmem(struct folio *folio) +static inline bool secretmem_mapping(struct address_space *mapping) { - struct address_space *mapping; - - /* - * Using folio_mapping() is quite slow because of the actual call - * instruction. - * We know that secretmem pages are not compound and LRU so we can - * save a couple of cycles here. - */ - if (folio_test_large(folio) || !folio_test_lru(folio)) - return false; - - mapping = (struct address_space *) - ((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS); - - if (!mapping || mapping != folio->mapping) - return false; - return mapping->a_ops == &secretmem_aops; } @@ -38,7 +21,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma) return false; } -static inline bool folio_is_secretmem(struct folio *folio) +static inline bool secretmem_mapping(struct address_space *mapping) { return false; } diff --git a/include/linux/security.h b/include/linux/security.h index d0eb20f90b26..21cf70346b33 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/sockptr.h> +#include <linux/bpf.h> #include <uapi/linux/lsm.h> struct linux_binprm; @@ -344,6 +345,8 @@ int security_inode_init_security_anon(struct inode *inode, const struct qstr *name, const struct inode *context_inode); int security_inode_create(struct inode *dir, struct dentry *dentry, umode_t mode); +void security_inode_post_create_tmpfile(struct mnt_idmap *idmap, + struct inode *inode); int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); int security_inode_unlink(struct inode *dir, struct dentry *dentry); @@ -361,6 +364,8 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode, int security_inode_permission(struct inode *inode, int mask); int security_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); +void security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid); int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, @@ -368,16 +373,22 @@ int security_inode_setxattr(struct mnt_idmap *idmap, int security_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl); +void security_inode_post_set_acl(struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); int security_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name); +void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name); void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_getxattr(struct dentry *dentry, const char *name); int security_inode_listxattr(struct dentry *dentry); int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name); +void security_inode_post_removexattr(struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry); int security_inode_getsecurity(struct mnt_idmap *idmap, @@ -387,11 +398,12 @@ int security_inode_setsecurity(struct inode *inode, const char *name, const void int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size); void security_inode_getsecid(struct inode *inode, u32 *secid); int security_inode_copy_up(struct dentry *src, struct cred **new); -int security_inode_copy_up_xattr(const char *name); +int security_inode_copy_up_xattr(struct dentry *src, const char *name); int security_kernfs_init_security(struct kernfs_node *kn_dir, struct kernfs_node *kn); int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); +void security_file_release(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); int security_file_ioctl_compat(struct file *file, unsigned int cmd, @@ -408,6 +420,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file); +int security_file_post_open(struct file *file, int mask); int security_file_truncate(struct file *file); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); @@ -478,9 +491,9 @@ int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops, unsigned nsops, int alter); void security_d_instantiate(struct dentry *dentry, struct inode *inode); int security_getselfattr(unsigned int attr, struct lsm_ctx __user *ctx, - size_t __user *size, u32 flags); + u32 __user *size, u32 flags); int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx, - size_t size, u32 flags); + u32 size, u32 flags); int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); int security_setprocattr(int lsmid, const char *name, void *value, size_t size); @@ -494,7 +507,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); -int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, size_t *uctx_len, +int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); #else /* CONFIG_SECURITY */ @@ -806,6 +819,10 @@ static inline int security_inode_create(struct inode *dir, return 0; } +static inline void +security_inode_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode) +{ } + static inline int security_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) @@ -879,6 +896,11 @@ static inline int security_inode_setattr(struct mnt_idmap *idmap, return 0; } +static inline void +security_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + int ia_valid) +{ } + static inline int security_inode_getattr(const struct path *path) { return 0; @@ -899,6 +921,11 @@ static inline int security_inode_set_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ } + static inline int security_inode_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) @@ -913,6 +940,11 @@ static inline int security_inode_remove_acl(struct mnt_idmap *idmap, return 0; } +static inline void security_inode_post_remove_acl(struct mnt_idmap *idmap, + struct dentry *dentry, + const char *acl_name) +{ } + static inline void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { } @@ -935,6 +967,10 @@ static inline int security_inode_removexattr(struct mnt_idmap *idmap, return cap_inode_removexattr(idmap, dentry, name); } +static inline void security_inode_post_removexattr(struct dentry *dentry, + const char *name) +{ } + static inline int security_inode_need_killpriv(struct dentry *dentry) { return cap_inode_need_killpriv(dentry); @@ -980,7 +1016,7 @@ static inline int security_kernfs_init_security(struct kernfs_node *kn_dir, return 0; } -static inline int security_inode_copy_up_xattr(const char *name) +static inline int security_inode_copy_up_xattr(struct dentry *src, const char *name) { return -EOPNOTSUPP; } @@ -995,6 +1031,9 @@ static inline int security_file_alloc(struct file *file) return 0; } +static inline void security_file_release(struct file *file) +{ } + static inline void security_file_free(struct file *file) { } @@ -1062,6 +1101,11 @@ static inline int security_file_open(struct file *file) return 0; } +static inline int security_file_post_open(struct file *file, int mask) +{ + return 0; +} + static inline int security_file_truncate(struct file *file) { return 0; @@ -1434,7 +1478,7 @@ static inline int security_locked_down(enum lockdown_reason what) return 0; } static inline int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, - size_t *uctx_len, void *val, size_t val_len, + u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags) { return -EOPNOTSUPP; @@ -1871,6 +1915,7 @@ int security_path_mkdir(const struct path *dir, struct dentry *dentry, umode_t m int security_path_rmdir(const struct path *dir, struct dentry *dentry); int security_path_mknod(const struct path *dir, struct dentry *dentry, umode_t mode, unsigned int dev); +void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry); int security_path_truncate(const struct path *path); int security_path_symlink(const struct path *dir, struct dentry *dentry, const char *old_name); @@ -1905,6 +1950,10 @@ static inline int security_path_mknod(const struct path *dir, struct dentry *den return 0; } +static inline void security_path_post_mknod(struct mnt_idmap *idmap, + struct dentry *dentry) +{ } + static inline int security_path_truncate(const struct path *path) { return 0; @@ -1956,6 +2005,9 @@ void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, const struct cred *cred, enum key_need_perm need_perm); int security_key_getsecurity(struct key *key, char **_buffer); +void security_key_post_create_or_update(struct key *keyring, struct key *key, + const void *payload, size_t payload_len, + unsigned long flags, bool create); #else @@ -1983,6 +2035,14 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer) return 0; } +static inline void security_key_post_create_or_update(struct key *keyring, + struct key *key, + const void *payload, + size_t payload_len, + unsigned long flags, + bool create) +{ } + #endif #endif /* CONFIG_KEYS */ @@ -2064,15 +2124,22 @@ static inline void securityfs_remove(struct dentry *dentry) union bpf_attr; struct bpf_map; struct bpf_prog; -struct bpf_prog_aux; +struct bpf_token; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); -extern int security_bpf_map_alloc(struct bpf_map *map); +extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token); extern void security_bpf_map_free(struct bpf_map *map); -extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); -extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token); +extern void security_bpf_prog_free(struct bpf_prog *prog); +extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path); +extern void security_bpf_token_free(struct bpf_token *token); +extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd); +extern int security_bpf_token_capable(const struct bpf_token *token, int cap); #else static inline int security_bpf(int cmd, union bpf_attr *attr, unsigned int size) @@ -2090,7 +2157,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog) return 0; } -static inline int security_bpf_map_alloc(struct bpf_map *map) +static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, + struct bpf_token *token) { return 0; } @@ -2098,13 +2166,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map) static inline void security_bpf_map_free(struct bpf_map *map) { } -static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, + struct bpf_token *token) +{ + return 0; +} + +static inline void security_bpf_prog_free(struct bpf_prog *prog) +{ } + +static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, + struct path *path) { return 0; } -static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +static inline void security_bpf_token_free(struct bpf_token *token) { } + +static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd) +{ + return 0; +} + +static inline int security_bpf_token_capable(const struct bpf_token *token, int cap) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/linux/selection.h b/include/linux/selection.h index 170ef28ff26b..bab7d30d3446 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -14,17 +14,16 @@ struct tty_struct; struct vc_data; -extern void clear_selection(void); -extern int set_selection_user(const struct tiocl_selection __user *sel, - struct tty_struct *tty); -extern int set_selection_kernel(struct tiocl_selection *v, - struct tty_struct *tty); -extern int paste_selection(struct tty_struct *tty); -extern int sel_loadlut(char __user *p); -extern int mouse_reporting(void); -extern void mouse_report(struct tty_struct * tty, int butt, int mrx, int mry); - -bool vc_is_sel(struct vc_data *vc); +void clear_selection(void); +int set_selection_user(const struct tiocl_selection __user *sel, + struct tty_struct *tty); +int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty); +int paste_selection(struct tty_struct *tty); +int sel_loadlut(u32 __user *lut); +int mouse_reporting(void); +void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry); + +bool vc_is_sel(const struct vc_data *vc); extern int console_blanked; @@ -33,24 +32,21 @@ extern unsigned char default_red[]; extern unsigned char default_grn[]; extern unsigned char default_blu[]; -extern unsigned short *screen_pos(const struct vc_data *vc, int w_offset, - bool viewed); -extern u16 screen_glyph(const struct vc_data *vc, int offset); -extern u32 screen_glyph_unicode(const struct vc_data *vc, int offset); -extern void complement_pos(struct vc_data *vc, int offset); -extern void invert_screen(struct vc_data *vc, int offset, int count, bool viewed); - -extern void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); -extern void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); - -extern u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); -extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); -extern void vcs_scr_updated(struct vc_data *vc); - -extern int vc_uniscr_check(struct vc_data *vc); -extern void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, - bool viewed, - unsigned int row, unsigned int col, - unsigned int nr); +unsigned short *screen_pos(const struct vc_data *vc, int w_offset, bool viewed); +u16 screen_glyph(const struct vc_data *vc, int offset); +u32 screen_glyph_unicode(const struct vc_data *vc, int offset); +void complement_pos(struct vc_data *vc, int offset); +void invert_screen(struct vc_data *vc, int offset, int count, bool viewed); + +void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]); +void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]); + +u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org); +void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org); +void vcs_scr_updated(struct vc_data *vc); + +int vc_uniscr_check(struct vc_data *vc); +void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, bool viewed, + unsigned int row, unsigned int col, unsigned int nr); #endif diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 234bcdb1fba4..8bd4fda6e027 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); -void seq_puts(struct seq_file *m, const char *s); +void __seq_puts(struct seq_file *m, const char *s); + +static __always_inline void seq_puts(struct seq_file *m, const char *s) +{ + if (!__builtin_constant_p(*s)) + __seq_puts(m, s); + else if (s[0] && !s[1]) + seq_putc(m, s[0]); + else + seq_write(m, s, __builtin_strlen(s)); +} + void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, unsigned long long num, unsigned int width); void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 3fab88ba265e..ff78efc1f60d 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -27,7 +27,7 @@ struct serdev_device; * not sleep. */ struct serdev_device_ops { - ssize_t (*receive_buf)(struct serdev_device *, const u8 *, size_t); + size_t (*receive_buf)(struct serdev_device *, const u8 *, size_t); void (*write_wakeup)(struct serdev_device *); }; @@ -185,9 +185,9 @@ static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl serdev->ops->write_wakeup(serdev); } -static inline ssize_t serdev_controller_receive_buf(struct serdev_controller *ctrl, - const u8 *data, - size_t count) +static inline size_t serdev_controller_receive_buf(struct serdev_controller *ctrl, + const u8 *data, + size_t count) { struct serdev_device *serdev = ctrl->serdev; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index be65de65fe61..fd59ed2cca53 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -210,6 +210,12 @@ int serial8250_console_exit(struct uart_port *port); void serial8250_set_isa_configurator(void (*v)(int port, struct uart_port *up, u32 *capabilities)); +#ifdef CONFIG_SERIAL_8250_CONSOLE +extern int hp300_setup_serial_console(void) __init; +#else +static inline int hp300_setup_serial_console(void) { return 0; } +#endif + #ifdef CONFIG_SERIAL_8250_RT288X int rt288x_setup(struct uart_port *p); int au_platform_setup(struct plat_serial8250_port *p); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 55b1f3ba48ac..8cb65f50e830 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -11,7 +11,6 @@ #include <linux/compiler.h> #include <linux/console.h> #include <linux/interrupt.h> -#include <linux/circ_buf.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/tty.h> @@ -467,9 +466,10 @@ struct uart_port { unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ + unsigned char iotype; /* io access style */ - unsigned char quirks; /* internal quirks */ +#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ @@ -479,7 +479,9 @@ struct uart_port { #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ - /* quirks must be updated while holding port mutex */ + unsigned char quirks; /* internal quirks */ + + /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) unsigned int read_status_mask; /* driver specific */ @@ -696,7 +698,6 @@ struct uart_state { struct tty_port port; enum uart_pm_state pm_state; - struct circ_buf xmit; atomic_t refcount; wait_queue_head_t remove_wait; @@ -720,12 +721,35 @@ struct uart_state { */ static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) { - struct circ_buf *xmit = &up->state->xmit; + struct tty_port *tport = &up->state->port; - xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1); + kfifo_skip_count(&tport->xmit_fifo, chars); up->icount.tx += chars; } +static inline unsigned int uart_fifo_out(struct uart_port *up, + unsigned char *buf, unsigned int chars) +{ + struct tty_port *tport = &up->state->port; + + chars = kfifo_out(&tport->xmit_fifo, buf, chars); + up->icount.tx += chars; + + return chars; +} + +static inline unsigned int uart_fifo_get(struct uart_port *up, + unsigned char *ch) +{ + struct tty_port *tport = &up->state->port; + unsigned int chars; + + chars = kfifo_get(&tport->xmit_fifo, ch); + up->icount.tx += chars; + + return chars; +} + struct module; struct tty_driver; @@ -761,7 +785,7 @@ enum UART_TX_FLAGS { for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ - struct circ_buf *xmit = &__port->state->xmit; \ + struct tty_port *__tport = &__port->state->port; \ unsigned int pending; \ \ for (; (for_test) && (tx_ready); (for_post), __port->icount.tx++) { \ @@ -772,21 +796,23 @@ enum UART_TX_FLAGS { continue; \ } \ \ - if (uart_circ_empty(xmit) || uart_tx_stopped(__port)) \ + if (uart_tx_stopped(__port)) \ + break; \ + \ + if (!kfifo_get(&__tport->xmit_fifo, &(ch))) \ break; \ \ - (ch) = xmit->buf[xmit->tail]; \ (put_char); \ - xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE; \ } \ \ (tx_done); \ \ - pending = uart_circ_chars_pending(xmit); \ + pending = kfifo_len(&__tport->xmit_fifo); \ if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \ + __port->ops->tx_empty(__port)) \ __port->ops->stop_tx(__port); \ } \ \ @@ -959,6 +985,8 @@ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); +int uart_read_port_properties(struct uart_port *port); +int uart_read_and_validate_port_properties(struct uart_port *port); bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2); @@ -968,15 +996,6 @@ bool uart_match_port(const struct uart_port *port1, int uart_suspend_port(struct uart_driver *reg, struct uart_port *port); int uart_resume_port(struct uart_driver *reg, struct uart_port *port); -#define uart_circ_empty(circ) ((circ)->head == (circ)->tail) -#define uart_circ_clear(circ) ((circ)->head = (circ)->tail = 0) - -#define uart_circ_chars_pending(circ) \ - (CIRC_CNT((circ)->head, (circ)->tail, UART_XMIT_SIZE)) - -#define uart_circ_chars_free(circ) \ - (CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE)) - static inline int uart_tx_stopped(struct uart_port *port) { struct tty_struct *tty = port->state->port.tty; diff --git a/include/linux/serial_max3100.h b/include/linux/serial_max3100.h deleted file mode 100644 index befd55c08a7c..000000000000 --- a/include/linux/serial_max3100.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * - * Copyright (C) 2007 Christian Pellegrin - */ - - -#ifndef _LINUX_SERIAL_MAX3100_H -#define _LINUX_SERIAL_MAX3100_H 1 - - -/** - * struct plat_max3100 - MAX3100 SPI UART platform data - * @loopback: force MAX3100 in loopback - * @crystal: 1 for 3.6864 Mhz, 0 for 1.8432 - * @max3100_hw_suspend: MAX3100 has a shutdown pin. This is a hook - * called on suspend and resume to activate it. - * @poll_time: poll time for CTS signal in ms, 0 disables (so no hw - * flow ctrl is possible but you have less CPU usage) - * - * You should use this structure in your machine description to specify - * how the MAX3100 is connected. Example: - * - * static struct plat_max3100 max3100_plat_data = { - * .loopback = 0, - * .crystal = 0, - * .poll_time = 100, - * }; - * - * static struct spi_board_info spi_board_info[] = { - * { - * .modalias = "max3100", - * .platform_data = &max3100_plat_data, - * .irq = IRQ_EINT12, - * .max_speed_hz = 5*1000*1000, - * .chip_select = 0, - * }, - * }; - * - **/ -struct plat_max3100 { - int loopback; - int crystal; - void (*max3100_hw_suspend) (int suspend); - int poll_time; -}; - -#endif diff --git a/include/linux/serio.h b/include/linux/serio.h index 6c27d413da92..7ca41af93b37 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -15,7 +15,7 @@ #include <linux/mod_devicetable.h> #include <uapi/linux/serio.h> -extern struct bus_type serio_bus; +extern const struct bus_type serio_bus; struct serio { void *port_data; diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 9346cd44814d..a45da7eef9a2 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -554,7 +554,7 @@ bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support, unsigned long *interfaces); phy_interface_t sfp_select_interface(struct sfp_bus *bus, - unsigned long *link_modes); + const unsigned long *link_modes); int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, @@ -592,7 +592,7 @@ static inline void sfp_parse_support(struct sfp_bus *bus, } static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, - unsigned long *link_modes) + const unsigned long *link_modes) { return PHY_INTERFACE_MODE_NA; } diff --git a/include/linux/shm.h b/include/linux/shm.h index c55bef0538e5..1d3d3ae958fb 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -16,7 +16,6 @@ struct sysv_shm { long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); -bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) #else @@ -30,10 +29,6 @@ static inline long do_shmat(int shmid, char __user *shmaddr, { return -ENOSYS; } -static inline bool is_file_shm_hugepages(struct file *file) -{ - return false; -} static inline void exit_shm(struct task_struct *task) { } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 2caa6b86106a..3fb18f7eb73e 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -37,7 +37,7 @@ struct shmem_inode_info { unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ #ifdef CONFIG_TMPFS_QUOTA - struct dquot *i_dquot[MAXQUOTAS]; + struct dquot __rcu *i_dquot[MAXQUOTAS]; #endif struct inode vfs_inode; }; @@ -97,11 +97,7 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); #ifdef CONFIG_SHMEM -extern const struct address_space_operations shmem_aops; -static inline bool shmem_mapping(struct address_space *mapping) -{ - return mapping->a_ops == &shmem_aops; -} +bool shmem_mapping(struct address_space *mapping); #else static inline bool shmem_mapping(struct address_space *mapping) { @@ -114,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, struct mm_struct *mm, unsigned long vm_flags); +#else +static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, + struct mm_struct *mm, unsigned long vm_flags) +{ + return false; +} +#endif + #ifdef CONFIG_SHMEM extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); #else diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index e2d45b7cb619..926496c9cc9c 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -177,10 +177,11 @@ static inline int skb_array_peek_len_any(struct skb_array *a) return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag); } -static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp) +static inline int skb_array_init_noprof(struct skb_array *a, int size, gfp_t gfp) { - return ptr_ring_init(&a->ring, size, gfp); + return ptr_ring_init_noprof(&a->ring, size, gfp); } +#define skb_array_init(...) alloc_hooks(skb_array_init_noprof(__VA_ARGS__)) static void __skb_array_destroy_skb(void *ptr) { @@ -198,15 +199,17 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } -static inline int skb_array_resize_multiple(struct skb_array **rings, - int nrings, unsigned int size, - gfp_t gfp) +static inline int skb_array_resize_multiple_noprof(struct skb_array **rings, + int nrings, unsigned int size, + gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); - return ptr_ring_resize_multiple((struct ptr_ring **)rings, - nrings, size, gfp, - __skb_array_destroy_skb); + return ptr_ring_resize_multiple_noprof((struct ptr_ring **)rings, + nrings, size, gfp, + __skb_array_destroy_skb); } +#define skb_array_resize_multiple(...) \ + alloc_hooks(skb_array_resize_multiple_noprof(__VA_ARGS__)) static inline void skb_array_cleanup(struct skb_array *a) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2dde34c29203..1c2902eaebd3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #endif #include <net/net_debug.h> #include <net/dropreason-core.h> +#include <net/netmem.h> /** * DOC: skb checksums @@ -352,14 +353,16 @@ struct sk_buff; #define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS -extern int sysctl_max_skb_frags; - /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to * segment using its current segmentation instead. */ #define GSO_BY_FRAGS 0xFFFF -typedef struct bio_vec skb_frag_t; +typedef struct skb_frag { + netmem_ref netmem; + unsigned int len; + unsigned int offset; +} skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment @@ -367,7 +370,7 @@ typedef struct bio_vec skb_frag_t; */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->bv_len; + return frag->len; } /** @@ -377,7 +380,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->bv_len = size; + frag->len = size; } /** @@ -387,7 +390,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->bv_len += delta; + frag->len += delta; } /** @@ -397,7 +400,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->bv_len -= delta; + frag->len -= delta; } /** @@ -417,7 +420,7 @@ static inline bool skb_frag_must_loop(struct page *p) * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->bv_page + * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@ -522,6 +525,13 @@ enum { #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS) +struct ubuf_info_ops { + void (*complete)(struct sk_buff *, struct ubuf_info *, + bool zerocopy_success); + /* has to be compatible with skb_zcopy_set() */ + int (*link_skb)(struct sk_buff *skb, struct ubuf_info *uarg); +}; + /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. @@ -531,8 +541,7 @@ enum { * The desc field is used to track userspace buffer index. */ struct ubuf_info { - void (*callback)(struct sk_buff *, struct ubuf_info *, - bool zerocopy_success); + const struct ubuf_info_ops *ops; refcount_t refcnt; u8 flags; }; @@ -748,8 +757,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -870,10 +877,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; @@ -992,7 +996,7 @@ struct sk_buff { #ifdef CONFIG_NETFILTER_SKIP_EGRESS __u8 nf_skip_egress:1; #endif -#ifdef CONFIG_TLS_DEVICE +#ifdef CONFIG_SKB_DECRYPTED __u8 decrypted:1; #endif __u8 slow_gro:1; @@ -1174,15 +1178,6 @@ static inline bool skb_dst_is_noref(const struct sk_buff *skb) return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); } -/** - * skb_rtable - Returns the skb &rtable - * @skb: buffer - */ -static inline struct rtable *skb_rtable(const struct sk_buff *skb) -{ - return (struct rtable *)skb_dst(skb); -} - /* For mangling skb->pkt_type from user space side from applications * such as nft, tc, etc, we only allow a conservative subset of * possible pkt_types to be set. @@ -1232,6 +1227,24 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +static inline bool skb_data_unref(const struct sk_buff *skb, + struct skb_shared_info *shinfo) +{ + int bias; + + if (!skb->cloned) + return true; + + bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; + + if (atomic_read(&shinfo->dataref) == bias) + smp_rmb(); + else if (atomic_sub_return(bias, &shinfo->dataref)) + return false; + + return true; +} + void __fix_address kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); @@ -1266,7 +1279,6 @@ static inline void consume_skb(struct sk_buff *skb) void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); -extern struct kmem_cache *skbuff_cache; void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, @@ -1598,17 +1610,26 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) static inline int skb_cmp_decrypted(const struct sk_buff *skb1, const struct sk_buff *skb2) { -#ifdef CONFIG_TLS_DEVICE +#ifdef CONFIG_SKB_DECRYPTED return skb2->decrypted - skb1->decrypted; #else return 0; #endif } +static inline bool skb_is_decrypted(const struct sk_buff *skb) +{ +#ifdef CONFIG_SKB_DECRYPTED + return skb->decrypted; +#else + return false; +#endif +} + static inline void skb_copy_decrypted(struct sk_buff *to, const struct sk_buff *from) { -#ifdef CONFIG_TLS_DEVICE +#ifdef CONFIG_SKB_DECRYPTED to->decrypted = from->decrypted; #endif } @@ -1645,14 +1666,13 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) } #endif +extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; + struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); -void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, - bool success); - int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length); @@ -1740,13 +1760,13 @@ static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) static inline void net_zcopy_put(struct ubuf_info *uarg) { if (uarg) - uarg->callback(NULL, uarg, true); + uarg->ops->complete(NULL, uarg, true); } static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { - if (uarg->callback == msg_zerocopy_callback) + if (uarg->ops == &msg_zerocopy_ubuf_ops) msg_zerocopy_put_abort(uarg, have_uref); else if (have_uref) net_zcopy_put(uarg); @@ -1760,7 +1780,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (uarg) { if (!skb_zcopy_is_nouarg(skb)) - uarg->callback(skb, uarg, zerocopy_success); + uarg->ops->complete(skb, uarg, zerocopy_success); skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } @@ -2429,22 +2449,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, + netmem_ref netmem, int off, + int size) +{ + frag->netmem = netmem; + frag->offset = off; + skb_frag_size_set(frag, size); +} + static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); +} + +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, + int i, netmem_ref netmem, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { - skb_frag_t *frag = &shinfo->frags[i]; - - skb_frag_fill_page_desc(frag, page, off, size); + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, + size); } /** @@ -2460,10 +2495,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) } /** - * __skb_fill_page_desc - initialise a paged fragment in an skb + * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised - * @i: paged fragment index to initialise - * @page: the page to use for this fragment + * @i: fragment index to initialise + * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * @@ -2472,10 +2507,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) * * Does not take any additional reference on the fragment. */ -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) { - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); + struct page *page = netmem_to_page(netmem); + + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely @@ -2483,7 +2520,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, */ page = compound_head(page); if (page_is_pfmemalloc(page)) - skb->pfmemalloc = true; + skb->pfmemalloc = true; +} + +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, + struct page *page, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); +} + +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, + netmem_ref netmem, int off, int size) +{ + __skb_fill_netmem_desc(skb, i, netmem, off, size); + skb_shinfo(skb)->nr_frags = i + 1; } /** @@ -2503,8 +2553,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { - __skb_fill_page_desc(skb, i, page, off, size); - skb_shinfo(skb)->nr_frags = i + 1; + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** @@ -2528,8 +2577,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, shinfo->nr_frags = i + 1; } -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, - int size, unsigned int truesize); +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, + int off, int size, unsigned int truesize); + +static inline void skb_add_rx_frag(struct sk_buff *skb, int i, + struct page *page, int off, int size, + unsigned int truesize) +{ + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, + truesize); +} void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); @@ -2642,6 +2699,8 @@ static inline void skb_put_u8(struct sk_buff *skb, u8 val) void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->data -= len; skb->len += len; return skb->data; @@ -2650,6 +2709,8 @@ static inline void *__skb_push(struct sk_buff *skb, unsigned int len) void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) @@ -2674,6 +2735,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { + DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; @@ -2846,6 +2909,11 @@ static inline void skb_set_inner_network_header(struct sk_buff *skb, skb->inner_network_header += offset; } +static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) +{ + return skb->inner_network_header > 0; +} + static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; @@ -2966,6 +3034,21 @@ static inline void skb_mac_header_rebuild(struct sk_buff *skb) } } +/* Move the full mac header up to current network_header. + * Leaves skb->data pointing at offset skb->mac_len into the mac_header. + * Must be provided the complete mac header length. + */ +static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len) +{ + if (skb_mac_header_was_set(skb)) { + const unsigned char *old_mac = skb_mac_header(skb); + + skb_set_mac_header(skb, -full_mac_len); + memmove(skb_mac_header(skb), old_mac, full_mac_len); + __skb_push(skb, full_mac_len - skb->mac_len); + } +} + static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); @@ -2983,6 +3066,7 @@ static inline int skb_transport_offset(const struct sk_buff *skb) static inline u32 skb_network_header_len(const struct sk_buff *skb) { + DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } @@ -3284,13 +3368,7 @@ static inline void *napi_alloc_frag_align(unsigned int fragsz, return __napi_alloc_frag_align(fragsz, -align); } -struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, - unsigned int length, gfp_t gfp_mask); -static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, - unsigned int length) -{ - return __napi_alloc_skb(napi, length, GFP_ATOMIC); -} +struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); void napi_consume_skb(struct sk_buff *skb, int budget); void napi_skb_free_stolen_head(struct sk_buff *skb); @@ -3305,7 +3383,7 @@ void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason); * * %NULL is returned if there is no free memory. */ -static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, +static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { /* This piece of code contains several assumptions. @@ -3318,13 +3396,11 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, */ gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; - return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); + return alloc_pages_node_noprof(NUMA_NO_NODE, gfp_mask, order); } +#define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__)) -static inline struct page *dev_alloc_pages(unsigned int order) -{ - return __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, order); -} +#define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order) /** * __dev_alloc_page - allocate a page for network Rx @@ -3334,15 +3410,13 @@ static inline struct page *dev_alloc_pages(unsigned int order) * * %NULL is returned if there is no free memory. */ -static inline struct page *__dev_alloc_page(gfp_t gfp_mask) +static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) { - return __dev_alloc_pages(gfp_mask, 0); + return __dev_alloc_pages_noprof(gfp_mask, 0); } +#define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__)) -static inline struct page *dev_alloc_page(void) -{ - return dev_alloc_pages(0); -} +#define dev_alloc_page() dev_alloc_pages(0) /** * dev_page_is_reusable - check whether a page can be reused for network Rx @@ -3378,7 +3452,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { - return frag->bv_offset; + return frag->offset; } /** @@ -3388,7 +3462,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { - frag->bv_offset += delta; + frag->offset += delta; } /** @@ -3398,7 +3472,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { - frag->bv_offset = offset; + frag->offset = offset; } /** @@ -3409,7 +3483,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_offset = fragfrom->bv_offset; + fragto->offset = fragfrom->offset; } /** @@ -3420,74 +3494,13 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->bv_page; -} - -/** - * __skb_frag_ref - take an addition reference on a paged fragment. - * @frag: the paged fragment - * - * Takes an additional reference on the paged fragment @frag. - */ -static inline void __skb_frag_ref(skb_frag_t *frag) -{ - get_page(skb_frag_page(frag)); -} - -/** - * skb_frag_ref - take an addition reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset. - * - * Takes an additional reference on the @f'th paged fragment of @skb. - */ -static inline void skb_frag_ref(struct sk_buff *skb, int f) -{ - __skb_frag_ref(&skb_shinfo(skb)->frags[f]); -} - -bool napi_pp_put_page(struct page *page, bool napi_safe); - -static inline void -napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe) -{ - struct page *page = skb_frag_page(frag); - -#ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page, napi_safe)) - return; -#endif - put_page(page); -} - -/** - * __skb_frag_unref - release a reference on a paged fragment. - * @frag: the paged fragment - * @recycle: recycle the page if allocated via page_pool - * - * Releases a reference on the paged fragment @frag - * or recycles the page via the page_pool API. - */ -static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) -{ - napi_frag_unref(frag, recycle, false); -} - -/** - * skb_frag_unref - release a reference on a paged fragment of an skb. - * @skb: the buffer - * @f: the fragment offset - * - * Releases a reference on the @f'th paged fragment of @skb. - */ -static inline void skb_frag_unref(struct sk_buff *skb, int f) -{ - struct skb_shared_info *shinfo = skb_shinfo(skb); - - if (!skb_zcopy_managed(skb)) - __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); + return netmem_to_page(frag->netmem); } +int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, + unsigned int headroom); +int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, + struct bpf_prog *prog); /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer @@ -3524,7 +3537,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { - fragto->bv_page = fragfrom->bv_page; + fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); @@ -3978,12 +3991,6 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len); -static inline void skb_free_datagram_locked(struct sock *sk, - struct sk_buff *skb) -{ - __skb_free_datagram_locked(sk, skb, 0); -} int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h new file mode 100644 index 000000000000..11f0a4063403 --- /dev/null +++ b/include/linux/skbuff_ref.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Skb ref helpers. + * + */ + +#ifndef _LINUX_SKBUFF_REF_H +#define _LINUX_SKBUFF_REF_H + +#include <linux/skbuff.h> + +/** + * __skb_frag_ref - take an addition reference on a paged fragment. + * @frag: the paged fragment + * + * Takes an additional reference on the paged fragment @frag. + */ +static inline void __skb_frag_ref(skb_frag_t *frag) +{ + get_page(skb_frag_page(frag)); +} + +/** + * skb_frag_ref - take an addition reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset. + * + * Takes an additional reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_ref(struct sk_buff *skb, int f) +{ + __skb_frag_ref(&skb_shinfo(skb)->frags[f]); +} + +bool napi_pp_put_page(struct page *page); + +static inline void +skb_page_unref(struct page *page, bool recycle) +{ +#ifdef CONFIG_PAGE_POOL + if (recycle && napi_pp_put_page(page)) + return; +#endif + put_page(page); +} + +/** + * __skb_frag_unref - release a reference on a paged fragment. + * @frag: the paged fragment + * @recycle: recycle the page if allocated via page_pool + * + * Releases a reference on the paged fragment @frag + * or recycles the page via the page_pool API. + */ +static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle) +{ + skb_page_unref(skb_frag_page(frag), recycle); +} + +/** + * skb_frag_unref - release a reference on a paged fragment of an skb. + * @skb: the buffer + * @f: the fragment offset + * + * Releases a reference on the @f'th paged fragment of @skb. + */ +static inline void skb_frag_unref(struct sk_buff *skb, int f) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (!skb_zcopy_managed(skb)) + __skb_frag_unref(&shinfo->frags[f], skb->pp_recycle); +} + +#endif /* _LINUX_SKBUFF_REF_H */ diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e65ec3fd2799..c9efda9df285 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -58,6 +58,10 @@ struct sk_psock_progs { struct bpf_prog *stream_parser; struct bpf_prog *stream_verdict; struct bpf_prog *skb_verdict; + struct bpf_link *msg_parser_link; + struct bpf_link *stream_parser_link; + struct bpf_link *stream_verdict_link; + struct bpf_link *skb_verdict_link; }; enum sk_psock_state_bits { @@ -410,11 +414,9 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg); -static inline struct sk_psock_link *sk_psock_init_link(void) -{ - return kzalloc(sizeof(struct sk_psock_link), - GFP_ATOMIC | __GFP_NOWARN); -} +#define sk_psock_init_link() \ + ((struct sk_psock_link *)kzalloc(sizeof(struct sk_psock_link), \ + GFP_ATOMIC | __GFP_NOWARN)) static inline void sk_psock_free_link(struct sk_psock_link *link) { @@ -461,10 +463,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock) static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock) { + read_lock_bh(&sk->sk_callback_lock); if (psock->saved_data_ready) psock->saved_data_ready(sk); else sk->sk_data_ready(sk); + read_unlock_bh(&sk->sk_callback_lock); } static inline void psock_set_prog(struct bpf_prog **pprog, diff --git a/include/linux/slab.h b/include/linux/slab.h index b5f5ee8308d0..7247e217e21b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -21,29 +21,72 @@ #include <linux/cleanup.h> #include <linux/hash.h> +enum _slab_flag_bits { + _SLAB_CONSISTENCY_CHECKS, + _SLAB_RED_ZONE, + _SLAB_POISON, + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_CACHE_DMA, + _SLAB_CACHE_DMA32, + _SLAB_STORE_USER, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_TRACE, +#ifdef CONFIG_DEBUG_OBJECTS + _SLAB_DEBUG_OBJECTS, +#endif + _SLAB_NOLEAKTRACE, + _SLAB_NO_MERGE, +#ifdef CONFIG_FAILSLAB + _SLAB_FAILSLAB, +#endif +#ifdef CONFIG_MEMCG_KMEM + _SLAB_ACCOUNT, +#endif +#ifdef CONFIG_KASAN_GENERIC + _SLAB_KASAN, +#endif + _SLAB_NO_USER_FLAGS, +#ifdef CONFIG_KFENCE + _SLAB_SKIP_KFENCE, +#endif +#ifndef CONFIG_SLUB_TINY + _SLAB_RECLAIM_ACCOUNT, +#endif + _SLAB_OBJECT_POISON, + _SLAB_CMPXCHG_DOUBLE, +#ifdef CONFIG_SLAB_OBJ_EXT + _SLAB_NO_OBJ_EXT, +#endif + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((slab_flags_t __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((slab_flags_t __force)(0U)) /* * Flags to pass to kmem_cache_create(). * The ones marked DEBUG need CONFIG_SLUB_DEBUG enabled, otherwise are no-op */ /* DEBUG: Perform (expensive) checks on alloc/free */ -#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U) +#define SLAB_CONSISTENCY_CHECKS __SLAB_FLAG_BIT(_SLAB_CONSISTENCY_CHECKS) /* DEBUG: Red zone objs in a cache */ -#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) +#define SLAB_RED_ZONE __SLAB_FLAG_BIT(_SLAB_RED_ZONE) /* DEBUG: Poison objects */ -#define SLAB_POISON ((slab_flags_t __force)0x00000800U) +#define SLAB_POISON __SLAB_FLAG_BIT(_SLAB_POISON) /* Indicate a kmalloc slab */ -#define SLAB_KMALLOC ((slab_flags_t __force)0x00001000U) +#define SLAB_KMALLOC __SLAB_FLAG_BIT(_SLAB_KMALLOC) /* Align objs on cache lines */ -#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) /* Use GFP_DMA memory */ -#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) +#define SLAB_CACHE_DMA __SLAB_FLAG_BIT(_SLAB_CACHE_DMA) /* Use GFP_DMA32 memory */ -#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) +#define SLAB_CACHE_DMA32 __SLAB_FLAG_BIT(_SLAB_CACHE_DMA32) /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) +#define SLAB_STORE_USER __SLAB_FLAG_BIT(_SLAB_STORE_USER) /* Panic if kmem_cache_create() fails */ -#define SLAB_PANIC ((slab_flags_t __force)0x00040000U) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) /* * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * @@ -95,21 +138,19 @@ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ -#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U) -/* Spread some memory over cpuset */ -#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) /* Trace allocations and frees */ -#define SLAB_TRACE ((slab_flags_t __force)0x00200000U) +#define SLAB_TRACE __SLAB_FLAG_BIT(_SLAB_TRACE) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS -# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U) +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_BIT(_SLAB_DEBUG_OBJECTS) #else -# define SLAB_DEBUG_OBJECTS 0 +# define SLAB_DEBUG_OBJECTS __SLAB_FLAG_UNUSED #endif /* Avoid kmemleak tracing */ -#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) +#define SLAB_NOLEAKTRACE __SLAB_FLAG_BIT(_SLAB_NOLEAKTRACE) /* * Prevent merging with compatible kmem caches. This flag should be used @@ -121,25 +162,25 @@ * - performance critical caches, should be very rare and consulted with slab * maintainers, and not used together with CONFIG_SLUB_TINY */ -#define SLAB_NO_MERGE ((slab_flags_t __force)0x01000000U) +#define SLAB_NO_MERGE __SLAB_FLAG_BIT(_SLAB_NO_MERGE) /* Fault injection mark */ #ifdef CONFIG_FAILSLAB -# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) +# define SLAB_FAILSLAB __SLAB_FLAG_BIT(_SLAB_FAILSLAB) #else -# define SLAB_FAILSLAB 0 +# define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif /* Account to memcg */ #ifdef CONFIG_MEMCG_KMEM -# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U) +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else -# define SLAB_ACCOUNT 0 +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED #endif #ifdef CONFIG_KASAN_GENERIC -#define SLAB_KASAN ((slab_flags_t __force)0x08000000U) +#define SLAB_KASAN __SLAB_FLAG_BIT(_SLAB_KASAN) #else -#define SLAB_KASAN 0 +#define SLAB_KASAN __SLAB_FLAG_UNUSED #endif /* @@ -147,23 +188,30 @@ * Intended for caches created for self-tests so they have only flags * specified in the code and other flags are ignored. */ -#define SLAB_NO_USER_FLAGS ((slab_flags_t __force)0x10000000U) +#define SLAB_NO_USER_FLAGS __SLAB_FLAG_BIT(_SLAB_NO_USER_FLAGS) #ifdef CONFIG_KFENCE -#define SLAB_SKIP_KFENCE ((slab_flags_t __force)0x20000000U) +#define SLAB_SKIP_KFENCE __SLAB_FLAG_BIT(_SLAB_SKIP_KFENCE) #else -#define SLAB_SKIP_KFENCE 0 +#define SLAB_SKIP_KFENCE __SLAB_FLAG_UNUSED #endif /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ #ifndef CONFIG_SLUB_TINY -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_BIT(_SLAB_RECLAIM_ACCOUNT) #else -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0) +#define SLAB_RECLAIM_ACCOUNT __SLAB_FLAG_UNUSED #endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ +/* Slab created using create_boot_cache */ +#ifdef CONFIG_SLAB_OBJ_EXT +#define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT) +#else +#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED +#endif + /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * @@ -223,12 +271,15 @@ int kmem_cache_shrink(struct kmem_cache *s); /* * Common kmalloc functions provided by all allocators */ -void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __realloc_size(2); +void * __must_check krealloc_noprof(const void *objp, size_t new_size, + gfp_t flags) __realloc_size(2); +#define krealloc(...) alloc_hooks(krealloc_noprof(__VA_ARGS__)) + void kfree(const void *objp); void kfree_sensitive(const void *objp); size_t __ksize(const void *objp); -DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) +DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T)) /** * ksize - Report actual allocation size of associated object @@ -475,7 +526,10 @@ static __always_inline unsigned int __kmalloc_index(size_t size, static_assert(PAGE_SHIFT <= 20); #define kmalloc_index(s) __kmalloc_index(s, true) -void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); +#include <linux/alloc_tag.h> + +void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); +#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__)) /** * kmem_cache_alloc - Allocate an object @@ -487,9 +541,14 @@ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_siz * * Return: pointer to the new object or %NULL in case of error */ -void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; -void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, - gfp_t gfpflags) __assume_slab_alignment __malloc; +void *kmem_cache_alloc_noprof(struct kmem_cache *cachep, + gfp_t flags) __assume_slab_alignment __malloc; +#define kmem_cache_alloc(...) alloc_hooks(kmem_cache_alloc_noprof(__VA_ARGS__)) + +void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_lru(...) alloc_hooks(kmem_cache_alloc_lru_noprof(__VA_ARGS__)) + void kmem_cache_free(struct kmem_cache *s, void *objp); /* @@ -500,29 +559,40 @@ void kmem_cache_free(struct kmem_cache *s, void *objp); * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p); -int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p); + +int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, void **p); +#define kmem_cache_alloc_bulk(...) alloc_hooks(kmem_cache_alloc_bulk_noprof(__VA_ARGS__)) static __always_inline void kfree_bulk(size_t size, void **p) { kmem_cache_free_bulk(NULL, size, p); } -void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment +void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); -void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment - __malloc; +#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__)) + +void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, + int node) __assume_slab_alignment __malloc; +#define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) -void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) +void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size) __assume_kmalloc_alignment __alloc_size(3); -void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_kmalloc_alignment +void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) __assume_kmalloc_alignment __alloc_size(4); -void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment +#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__)) + +#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__)) + +void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); +#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__)) -void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_alignment +void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment __alloc_size(1); +#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__)) /** * kmalloc - allocate kernel memory @@ -578,37 +648,39 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * Try really hard to succeed the allocation but fail * eventually. */ -static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) +static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t flags) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large(size, flags); + return kmalloc_large_noprof(size, flags); index = kmalloc_index(size); - return kmalloc_trace( + return kmalloc_trace_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } - return __kmalloc(size, flags); + return __kmalloc_noprof(size, flags); } +#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) -static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large_node(size, flags, node); + return kmalloc_large_node_noprof(size, flags, node); index = kmalloc_index(size); - return kmalloc_node_trace( + return kmalloc_node_trace_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } - return __kmalloc_node(size, flags, node); + return __kmalloc_node_noprof(size, flags, node); } +#define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) /** * kmalloc_array - allocate memory for an array. @@ -616,16 +688,17 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t size, gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) - return kmalloc(bytes, flags); - return __kmalloc(bytes, flags); + return kmalloc_noprof(bytes, flags); + return kmalloc_noprof(bytes, flags); } +#define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) /** * krealloc_array - reallocate memory for an array. @@ -634,18 +707,19 @@ static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_ * @new_size: new size of a single member of the array * @flags: the type of memory to allocate (see kmalloc) */ -static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p, - size_t new_n, - size_t new_size, - gfp_t flags) +static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(void *p, + size_t new_n, + size_t new_size, + gfp_t flags) { size_t bytes; if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) return NULL; - return krealloc(p, bytes, flags); + return krealloc_noprof(p, bytes, flags); } +#define krealloc_array(...) alloc_hooks(krealloc_array_noprof(__VA_ARGS__)) /** * kcalloc - allocate memory for an array. The memory is set to zero. @@ -653,16 +727,12 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array(void *p, * @size: element size. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags) -{ - return kmalloc_array(n, size, flags | __GFP_ZERO); -} +#define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) -void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, +void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, unsigned long caller) __alloc_size(1); -#define kmalloc_node_track_caller(size, flags, node) \ - __kmalloc_node_track_caller(size, flags, node, \ - _RET_IP_) +#define kmalloc_node_track_caller(...) \ + alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) /* * kmalloc_track_caller is a special version of kmalloc that records the @@ -672,11 +742,12 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node, * allocator where we care about the real place the memory allocation * request comes from. */ -#define kmalloc_track_caller(size, flags) \ - __kmalloc_node_track_caller(size, flags, \ - NUMA_NO_NODE, _RET_IP_) +#define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) + +#define kmalloc_track_caller_noprof(...) \ + kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) -static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, +static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { size_t bytes; @@ -684,77 +755,65 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) - return kmalloc_node(bytes, flags, node); - return __kmalloc_node(bytes, flags, node); + return kmalloc_node_noprof(bytes, flags, node); + return __kmalloc_node_noprof(bytes, flags, node); } +#define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) -static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) -{ - return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); -} +#define kcalloc_node(_n, _size, _flags, _node) \ + kmalloc_array_node(_n, _size, (_flags) | __GFP_ZERO, _node) /* * Shortcuts */ -static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags) -{ - return kmem_cache_alloc(k, flags | __GFP_ZERO); -} +#define kmem_cache_zalloc(_k, _flags) kmem_cache_alloc(_k, (_flags)|__GFP_ZERO) /** * kzalloc - allocate memory. The memory is set to zero. * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). */ -static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags) +static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) { - return kmalloc(size, flags | __GFP_ZERO); + return kmalloc_noprof(size, flags | __GFP_ZERO); } +#define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) +#define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -/** - * kzalloc_node - allocate zeroed memory from a particular memory node. - * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate (see kmalloc). - * @node: memory node from which to allocate - */ -static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node) -{ - return kmalloc_node(size, flags | __GFP_ZERO, node); -} +extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1); +#define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) -extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1); -static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags) -{ - return kvmalloc_node(size, flags, NUMA_NO_NODE); -} -static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node) -{ - return kvmalloc_node(size, flags | __GFP_ZERO, node); -} -static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags) -{ - return kvmalloc(size, flags | __GFP_ZERO); -} +#define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) +#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) +#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) + +#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +static inline __alloc_size(1, 2) void * +kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - return kvmalloc(bytes, flags); + return kvmalloc_node_noprof(bytes, flags, node); } -static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags) -{ - return kvmalloc_array(n, size, flags | __GFP_ZERO); -} +#define kvmalloc_array_noprof(...) kvmalloc_array_node_noprof(__VA_ARGS__, NUMA_NO_NODE) +#define kvcalloc_node_noprof(_n,_s,_f,_node) kvmalloc_array_node_noprof(_n,_s,(_f)|__GFP_ZERO,_node) +#define kvcalloc_noprof(...) kvcalloc_node_noprof(__VA_ARGS__, NUMA_NO_NODE) + +#define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) +#define kvcalloc_node(...) alloc_hooks(kvcalloc_node_noprof(__VA_ARGS__)) +#define kvcalloc(...) alloc_hooks(kvcalloc_noprof(__VA_ARGS__)) -extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags) +extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags) __realloc_size(3); +#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__)) + extern void kvfree(const void *addr); -DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T)) +DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T)) extern void kvfree_sensitive(const void *addr, size_t len); diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h index 12c9719b2a55..3042385b7b40 100644 --- a/include/linux/slimbus.h +++ b/include/linux/slimbus.h @@ -10,7 +10,7 @@ #include <linux/completion.h> #include <linux/mod_devicetable.h> -extern struct bus_type slimbus_bus; +extern const struct bus_type slimbus_bus; /** * struct slim_eaddr - Enumeration address for a SLIMbus device diff --git a/include/linux/smp.h b/include/linux/smp.h index e87520dc2959..fcd61dfe2af3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -105,6 +105,12 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func, on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); } +/* + * Architecture specific boot CPU setup. Defined as empty weak function in + * init/main.c. Architectures can override it. + */ +void smp_prepare_boot_cpu(void); + #ifdef CONFIG_SMP #include <linux/preempt.h> @@ -171,12 +177,6 @@ void generic_smp_call_function_single_interrupt(void); #define generic_smp_call_function_interrupt \ generic_smp_call_function_single_interrupt -/* - * Mark the boot cpu "online" so that it can call console drivers in - * printk() and can access its per-cpu storage. - */ -void smp_prepare_boot_cpu(void); - extern unsigned int setup_max_cpus; extern void __init setup_nr_cpu_ids(void); extern void __init smp_init(void); @@ -203,7 +203,6 @@ static inline void up_smp_call_function(smp_call_func_t func, void *info) (up_smp_call_function(func, info)) static inline void smp_send_reschedule(int cpu) { } -#define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) static inline void call_function_init(void) { } @@ -218,6 +217,8 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } static inline void wake_up_all_idle_cpus(void) { } +#define setup_max_cpus 0 + #ifdef CONFIG_UP_LATE_INIT extern void __init up_late_init(void); static inline void smp_init(void) { up_late_init(); } @@ -261,7 +262,7 @@ static inline int get_boot_cpu_id(void) * regular asm read for the stable. */ #ifndef __smp_processor_id -#define __smp_processor_id(x) raw_smp_processor_id(x) +#define __smp_processor_id() raw_smp_processor_id() #endif #ifdef CONFIG_DEBUG_PREEMPT diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h new file mode 100644 index 000000000000..edc3182d6e66 --- /dev/null +++ b/include/linux/soc/andes/irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Andes Technology Corporation + */ +#ifndef __ANDES_IRQ_H +#define __ANDES_IRQ_H + +/* Andes PMU irq number */ +#define ANDES_RV_IRQ_PMOVI 18 +#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI +#define ANDES_SLI_CAUSE_BASE 256 + +/* Andes PMU related registers */ +#define ANDES_CSR_SLIE 0x9c4 +#define ANDES_CSR_SLIP 0x9c5 +#define ANDES_CSR_SCOUNTEROF 0x9d4 + +#endif /* __ANDES_IRQ_H */ diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 649955d2cf5c..d4a8e34505e6 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -14,6 +14,15 @@ #define CMDQ_ADDR_HIGH(addr) ((u32)(((addr) >> 16) & GENMASK(31, 0))) #define CMDQ_ADDR_LOW(addr) ((u16)(addr) | BIT(1)) +/* + * Every cmdq thread has its own SPRs (Specific Purpose Registers), + * so there are 4 * N (threads) SPRs in GCE that shares the same indexes below. + */ +#define CMDQ_THR_SPR_IDX0 (0) +#define CMDQ_THR_SPR_IDX1 (1) +#define CMDQ_THR_SPR_IDX2 (2) +#define CMDQ_THR_SPR_IDX3 (3) + struct cmdq_pkt; struct cmdq_client_reg { @@ -62,17 +71,19 @@ void cmdq_mbox_destroy(struct cmdq_client *client); /** * cmdq_pkt_create() - create a CMDQ packet * @client: the CMDQ mailbox client + * @pkt: the CMDQ packet * @size: required CMDQ buffer size * - * Return: CMDQ packet pointer + * Return: 0 for success; else the error code is returned */ -struct cmdq_pkt *cmdq_pkt_create(struct cmdq_client *client, size_t size); +int cmdq_pkt_create(struct cmdq_client *client, struct cmdq_pkt *pkt, size_t size); /** * cmdq_pkt_destroy() - destroy the CMDQ packet + * @client: the CMDQ mailbox client * @pkt: the CMDQ packet */ -void cmdq_pkt_destroy(struct cmdq_pkt *pkt); +void cmdq_pkt_destroy(struct cmdq_client *client, struct cmdq_pkt *pkt); /** * cmdq_pkt_write() - append write command to the CMDQ packet @@ -174,6 +185,18 @@ int cmdq_pkt_write_s_mask_value(struct cmdq_pkt *pkt, u8 high_addr_reg_idx, u16 addr_low, u32 value, u32 mask); /** + * cmdq_pkt_mem_move() - append memory move command to the CMDQ packet + * @pkt: the CMDQ packet + * @src_addr: source address + * @dst_addr: destination address + * + * Appends a CMDQ command to copy the value found in `src_addr` to `dst_addr`. + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_mem_move(struct cmdq_pkt *pkt, dma_addr_t src_addr, dma_addr_t dst_addr); + +/** * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet * @pkt: the CMDQ packet * @event: the desired event type to wait @@ -184,6 +207,21 @@ int cmdq_pkt_write_s_mask_value(struct cmdq_pkt *pkt, u8 high_addr_reg_idx, int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event, bool clear); /** + * cmdq_pkt_acquire_event() - append acquire event command to the CMDQ packet + * @pkt: the CMDQ packet + * @event: the desired event to be acquired + * + * User can use cmdq_pkt_acquire_event() as `mutex_lock` and cmdq_pkt_clear_event() + * as `mutex_unlock` to protect some `critical section` instructions between them. + * cmdq_pkt_acquire_event() would wait for event to be cleared. + * After event is cleared by cmdq_pkt_clear_event in other GCE threads, + * cmdq_pkt_acquire_event() would set event and keep executing next instruction. + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_acquire_event(struct cmdq_pkt *pkt, u16 event); + +/** * cmdq_pkt_clear_event() - append clear event command to the CMDQ packet * @pkt: the CMDQ packet * @event: the desired event to be cleared @@ -248,36 +286,76 @@ int cmdq_pkt_poll_mask(struct cmdq_pkt *pkt, u8 subsys, int cmdq_pkt_assign(struct cmdq_pkt *pkt, u16 reg_idx, u32 value); /** - * cmdq_pkt_jump() - Append jump command to the CMDQ packet, ask GCE - * to execute an instruction that change current thread PC to - * a physical address which should contains more instruction. + * cmdq_pkt_poll_addr() - Append blocking POLL command to CMDQ packet + * @pkt: the CMDQ packet + * @addr: the hardware register address + * @value: the specified target register value + * @mask: the specified target register mask + * + * Appends a polling (POLL) command to the CMDQ packet and asks the GCE + * to execute an instruction that checks for the specified `value` (with + * or without `mask`) to appear in the specified hardware register `addr`. + * All GCE threads will be blocked by this instruction. + * + * Return: 0 for success or negative error code + */ +int cmdq_pkt_poll_addr(struct cmdq_pkt *pkt, dma_addr_t addr, u32 value, u32 mask); + +/** + * cmdq_pkt_jump_abs() - Append jump command to the CMDQ packet, ask GCE + * to execute an instruction that change current thread + * PC to a absolute physical address which should + * contains more instruction. * @pkt: the CMDQ packet - * @addr: physical address of target instruction buffer + * @addr: absolute physical address of target instruction buffer + * @shift_pa: shift bits of physical address in CMDQ instruction. This value + * is got by cmdq_get_shift_pa(). * * Return: 0 for success; else the error code is returned */ -int cmdq_pkt_jump(struct cmdq_pkt *pkt, dma_addr_t addr); +int cmdq_pkt_jump_abs(struct cmdq_pkt *pkt, dma_addr_t addr, u8 shift_pa); + +/* This wrapper has to be removed after all users migrated to jump_abs */ +static inline int cmdq_pkt_jump(struct cmdq_pkt *pkt, dma_addr_t addr, u8 shift_pa) +{ + return cmdq_pkt_jump_abs(pkt, addr, shift_pa); +} /** - * cmdq_pkt_finalize() - Append EOC and jump command to pkt. + * cmdq_pkt_jump_rel() - Append jump command to the CMDQ packet, ask GCE + * to execute an instruction that change current thread + * PC to a physical address with relative offset. The + * target address should contains more instruction. * @pkt: the CMDQ packet + * @offset: relative offset of target instruction buffer from current PC. + * @shift_pa: shift bits of physical address in CMDQ instruction. This value + * is got by cmdq_get_shift_pa(). * * Return: 0 for success; else the error code is returned */ -int cmdq_pkt_finalize(struct cmdq_pkt *pkt); +int cmdq_pkt_jump_rel(struct cmdq_pkt *pkt, s32 offset, u8 shift_pa); + +/** + * cmdq_pkt_eoc() - Append EOC and ask GCE to generate an IRQ at end of execution + * @pkt: The CMDQ packet + * + * Appends an End Of Code (EOC) command to the CMDQ packet and asks the GCE + * to generate an interrupt at the end of the execution of all commands in + * the pipeline. + * The EOC command is usually appended to the end of the pipeline to notify + * that all commands are done. + * + * Return: 0 for success or negative error number + */ +int cmdq_pkt_eoc(struct cmdq_pkt *pkt); /** - * cmdq_pkt_flush_async() - trigger CMDQ to asynchronously execute the CMDQ - * packet and call back at the end of done packet + * cmdq_pkt_finalize() - Append EOC and jump command to pkt. * @pkt: the CMDQ packet * * Return: 0 for success; else the error code is returned - * - * Trigger CMDQ to asynchronously execute the CMDQ packet and call back - * at the end of done packet. Note that this is an ASYNC function. When the - * function returned, it may or may not be finished. */ -int cmdq_pkt_flush_async(struct cmdq_pkt *pkt); +int cmdq_pkt_finalize(struct cmdq_pkt *pkt); #else /* IS_ENABLED(CONFIG_MTK_CMDQ) */ @@ -294,12 +372,12 @@ static inline struct cmdq_client *cmdq_mbox_create(struct device *dev, int index static inline void cmdq_mbox_destroy(struct cmdq_client *client) { } -static inline struct cmdq_pkt *cmdq_pkt_create(struct cmdq_client *client, size_t size) +static inline int cmdq_pkt_create(struct cmdq_client *client, struct cmdq_pkt *pkt, size_t size) { - return ERR_PTR(-EINVAL); + return -EINVAL; } -static inline void cmdq_pkt_destroy(struct cmdq_pkt *pkt) { } +static inline void cmdq_pkt_destroy(struct cmdq_client *client, struct cmdq_pkt *pkt) { } static inline int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value) { @@ -374,17 +452,32 @@ static inline int cmdq_pkt_assign(struct cmdq_pkt *pkt, u16 reg_idx, u32 value) return -EINVAL; } -static inline int cmdq_pkt_jump(struct cmdq_pkt *pkt, dma_addr_t addr) +static inline int cmdq_pkt_poll_addr(struct cmdq_pkt *pkt, dma_addr_t addr, u32 value, u32 mask) { return -EINVAL; } -static inline int cmdq_pkt_finalize(struct cmdq_pkt *pkt) +static inline int cmdq_pkt_jump_abs(struct cmdq_pkt *pkt, dma_addr_t addr, u8 shift_pa) +{ + return -EINVAL; +} + +static inline int cmdq_pkt_jump(struct cmdq_pkt *pkt, dma_addr_t addr, u8 shift_pa) +{ + return -EINVAL; +} + +static inline int cmdq_pkt_jump_rel(struct cmdq_pkt *pkt, s32 offset, u8 shift_pa) { return -EINVAL; } -static inline int cmdq_pkt_flush_async(struct cmdq_pkt *pkt) +static inline int cmdq_pkt_eoc(struct cmdq_pkt *pkt) +{ + return -EINVAL; +} + +static inline int cmdq_pkt_finalize(struct cmdq_pkt *pkt) { return -EINVAL; } diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index be98aebcb3e1..7161a3183eda 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -9,7 +9,7 @@ #include <dt-bindings/soc/qcom,apr.h> #include <dt-bindings/soc/qcom,gpr.h> -extern struct bus_type aprbus; +extern const struct bus_type aprbus; #define APR_HDR_LEN(hdr_len) ((hdr_len)/4) diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h index 29e06905bc1f..0f038a1a0330 100644 --- a/include/linux/soc/qcom/geni-se.h +++ b/include/linux/soc/qcom/geni-se.h @@ -178,6 +178,7 @@ struct geni_se { #define M_GP_IRQ_3_EN BIT(12) #define M_GP_IRQ_4_EN BIT(13) #define M_GP_IRQ_5_EN BIT(14) +#define M_TX_FIFO_NOT_EMPTY_EN BIT(21) #define M_IO_DATA_DEASSERT_EN BIT(22) #define M_IO_DATA_ASSERT_EN BIT(23) #define M_RX_FIFO_RD_ERR_EN BIT(24) diff --git a/include/linux/soc/qcom/qcom-pbs.h b/include/linux/soc/qcom/qcom-pbs.h new file mode 100644 index 000000000000..8a46209ccf13 --- /dev/null +++ b/include/linux/soc/qcom/qcom-pbs.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _QCOM_PBS_H +#define _QCOM_PBS_H + +#include <linux/errno.h> +#include <linux/types.h> + +struct device_node; +struct pbs_dev; + +#if IS_ENABLED(CONFIG_QCOM_PBS) +int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap); +struct pbs_dev *get_pbs_client_device(struct device *client_dev); +#else +static inline int qcom_pbs_trigger_event(struct pbs_dev *pbs, u8 bitmap) +{ + return -ENODEV; +} + +static inline struct pbs_dev *get_pbs_client_device(struct device *client_dev) +{ + return ERR_PTR(-ENODEV); +} +#endif + +#endif diff --git a/include/linux/soc/samsung/exynos-pmu.h b/include/linux/soc/samsung/exynos-pmu.h index a4f5516cc956..2bd9d12d9a52 100644 --- a/include/linux/soc/samsung/exynos-pmu.h +++ b/include/linux/soc/samsung/exynos-pmu.h @@ -10,6 +10,7 @@ #define __LINUX_SOC_EXYNOS_PMU_H struct regmap; +struct device_node; enum sys_powerdown { SYS_AFTR, @@ -20,12 +21,20 @@ enum sys_powerdown { extern void exynos_sys_powerdown_conf(enum sys_powerdown mode); #ifdef CONFIG_EXYNOS_PMU -extern struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap(void); +struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname); #else static inline struct regmap *exynos_get_pmu_regmap(void) { return ERR_PTR(-ENODEV); } + +static inline struct regmap *exynos_get_pmu_regmap_by_phandle(struct device_node *np, + const char *propname) +{ + return ERR_PTR(-ENODEV); +} #endif #endif /* __LINUX_SOC_EXYNOS_PMU_H */ diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 0b9ecd8cf979..110978dc9af1 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -13,6 +13,7 @@ struct nlmsghdr; struct sock; struct sock_diag_handler { + struct module *owner; __u8 family; int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh); int (*get_info)(struct sk_buff *skb, struct sock *sk); @@ -22,8 +23,13 @@ struct sock_diag_handler { int sock_diag_register(const struct sock_diag_handler *h); void sock_diag_unregister(const struct sock_diag_handler *h); -void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); -void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +struct sock_diag_inet_compat { + struct module *owner; + int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh); +}; + +void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr); +void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr); u64 __sock_gen_cookie(struct sock *sk); diff --git a/include/linux/socket.h b/include/linux/socket.h index cfcb7e2c3813..89d16b90370b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -16,6 +16,7 @@ struct cred; struct socket; struct sock; struct sk_buff; +struct proto_accept_arg; #define __sockaddr_check_size(size) \ BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage))) @@ -422,13 +423,6 @@ extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, struct user_msghdr __user *umsg, struct sockaddr __user *uaddr, unsigned int flags); -extern int sendmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct iovec **iov); -extern int recvmsg_copy_msghdr(struct msghdr *msg, - struct user_msghdr __user *umsg, unsigned flags, - struct sockaddr __user **uaddr, - struct iovec **iov); extern int __copy_msghdr(struct msghdr *kmsg, struct user_msghdr *umsg, struct sockaddr __user **save_addr); @@ -440,7 +434,7 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size, extern int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len); -extern struct file *do_accept(struct file *file, unsigned file_flags, +extern struct file *do_accept(struct file *file, struct proto_accept_arg *arg, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 307961b41541..fc5a206c4043 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, return 0; } +/* Deprecated. + * This is unsafe, unless caller checked user provided optlen. + * Prefer copy_safe_from_sockptr() instead. + */ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { return copy_from_sockptr_offset(dst, src, 0, size); } +/** + * copy_safe_from_sockptr: copy a struct from sockptr + * @dst: Destination address, in kernel space. This buffer must be @ksize + * bytes long. + * @ksize: Size of @dst struct. + * @optval: Source address. (in user or kernel space) + * @optlen: Size of @optval data. + * + * Returns: + * * -EINVAL: @optlen < @ksize + * * -EFAULT: access to userspace failed. + * * 0 : @ksize bytes were copied + */ +static inline int copy_safe_from_sockptr(void *dst, size_t ksize, + sockptr_t optval, unsigned int optlen) +{ + if (optlen < ksize) + return -EINVAL; + return copy_from_sockptr(dst, optval, ksize); +} + static inline int copy_struct_from_sockptr(void *dst, size_t ksize, sockptr_t src, size_t usize) { @@ -92,9 +117,9 @@ static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) return copy_to_sockptr_offset(dst, 0, src, size); } -static inline void *memdup_sockptr(sockptr_t src, size_t len) +static inline void *memdup_sockptr_noprof(sockptr_t src, size_t len) { - void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); + void *p = kmalloc_track_caller_noprof(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); @@ -104,10 +129,11 @@ static inline void *memdup_sockptr(sockptr_t src, size_t len) } return p; } +#define memdup_sockptr(...) alloc_hooks(memdup_sockptr_noprof(__VA_ARGS__)) -static inline void *memdup_sockptr_nul(sockptr_t src, size_t len) +static inline void *memdup_sockptr_nul_noprof(sockptr_t src, size_t len) { - char *p = kmalloc_track_caller(len + 1, GFP_KERNEL); + char *p = kmalloc_track_caller_noprof(len + 1, GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); @@ -118,6 +144,7 @@ static inline void *memdup_sockptr_nul(sockptr_t src, size_t len) p[len] = '\0'; return p; } +#define memdup_sockptr_nul(...) alloc_hooks(memdup_sockptr_nul_noprof(__VA_ARGS__)) static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) { diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 66f814b63a43..13e96d8b7423 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -235,6 +235,7 @@ enum sdw_clk_stop_mode { * @BRA_flow_controlled: Slave implementation results in an OK_NotReady * response * @simple_ch_prep_sm: If channel prepare sequence is required + * @ch_prep_timeout: Port-specific timeout value, in milliseconds * @imp_def_interrupts: If set, each bit corresponds to support for * implementation-defined interrupts * @@ -249,6 +250,7 @@ struct sdw_dp0_prop { u32 *words; bool BRA_flow_controlled; bool simple_ch_prep_sm; + u32 ch_prep_timeout; bool imp_def_interrupts; }; @@ -543,21 +545,6 @@ enum sdw_reg_bank { }; /** - * struct sdw_bus_conf: Bus configuration - * - * @clk_freq: Clock frequency, in Hz - * @num_rows: Number of rows in frame - * @num_cols: Number of columns in frame - * @bank: Next register bank - */ -struct sdw_bus_conf { - unsigned int clk_freq; - unsigned int num_rows; - unsigned int num_cols; - unsigned int bank; -}; - -/** * struct sdw_prepare_ch: Prepare/De-prepare Data Port channel * * @num: Port number @@ -899,6 +886,7 @@ struct sdw_master_ops { * @port_ops: Master port callback ops * @params: Current bus parameters * @prop: Master properties + * @vendor_specific_prop: pointer to non-standard properties * @m_rt_list: List of Master instance of all stream(s) running on Bus. This * is used to compute and program bus bandwidth, clock, frame shape, * transport and port parameters @@ -933,6 +921,7 @@ struct sdw_bus { const struct sdw_master_port_ops *port_ops; struct sdw_bus_params params; struct sdw_master_prop prop; + void *vendor_specific_prop; struct list_head m_rt_list; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index ceecad74aef9..28a4eb77717f 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -1,11 +1,12 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ /* - * Copyright (C) 2023 Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2023-24 Advanced Micro Devices, Inc. All rights reserved. */ #ifndef __SDW_AMD_H #define __SDW_AMD_H +#include <linux/acpi.h> #include <linux/soundwire/sdw.h> /* AMD pm_runtime quirk definitions */ @@ -25,6 +26,7 @@ #define AMD_SDW_POWER_OFF_MODE 2 #define ACP_SDW0 0 #define ACP_SDW1 1 +#define AMD_SDW_MAX_MANAGER_COUNT 2 struct acp_sdw_pdata { u16 instance; @@ -32,12 +34,6 @@ struct acp_sdw_pdata { struct mutex *acp_sdw_lock; }; -struct sdw_manager_reg_mask { - u32 sw_pad_enable_mask; - u32 sw_pad_pulldown_mask; - u32 acp_sdw_intr_mask; -}; - /** * struct sdw_amd_dai_runtime: AMD sdw dai runtime data * @@ -59,10 +55,8 @@ struct sdw_amd_dai_runtime { * @dev: linux device * @mmio: SoundWire registers mmio base * @acp_mmio: acp registers mmio base - * @reg_mask: register mask structure per manager instance * @amd_sdw_irq_thread: SoundWire manager irq workqueue * @amd_sdw_work: peripheral status work queue - * @probe_work: SoundWire manager probe workqueue * @acp_sdw_lock: mutex to protect acp share register access * @status: peripheral devices status array * @num_din_ports: number of input ports @@ -83,10 +77,8 @@ struct amd_sdw_manager { void __iomem *mmio; void __iomem *acp_mmio; - struct sdw_manager_reg_mask *reg_mask; struct work_struct amd_sdw_irq_thread; struct work_struct amd_sdw_work; - struct work_struct probe_work; /* mutex to protect acp common register access */ struct mutex *acp_sdw_lock; @@ -106,4 +98,71 @@ struct amd_sdw_manager { struct sdw_amd_dai_runtime **dai_runtime_array; }; + +/** + * struct sdw_amd_acpi_info - Soundwire AMD information found in ACPI tables + * @handle: ACPI controller handle + * @count: maximum no of soundwire manager links supported on AMD platform. + * @link_mask: bit-wise mask listing links enabled by BIOS menu + */ +struct sdw_amd_acpi_info { + acpi_handle handle; + int count; + u32 link_mask; +}; + +/** + * struct sdw_amd_ctx - context allocated by the controller driver probe + * + * @count: link count + * @num_slaves: total number of devices exposed across all enabled links + * @link_mask: bit-wise mask listing SoundWire links reported by the + * Controller + * @ids: array of slave_id, representing Slaves exposed across all enabled + * links + * @pdev: platform device structure + */ +struct sdw_amd_ctx { + int count; + int num_slaves; + u32 link_mask; + struct sdw_extended_slave_id *ids; + struct platform_device *pdev[AMD_SDW_MAX_MANAGER_COUNT]; +}; + +/** + * struct sdw_amd_res - Soundwire AMD global resource structure, + * typically populated by the DSP driver/Legacy driver + * + * @addr: acp pci device resource start address + * @reg_range: ACP register range + * @link_mask: bit-wise mask listing links selected by the DSP driver/ + * legacy driver + * @count: link count + * @mmio_base: mmio base of SoundWire registers + * @handle: ACPI parent handle + * @parent: parent device + * @dev: device implementing hwparams and free callbacks + * @acp_lock: mutex protecting acp common registers access + */ +struct sdw_amd_res { + u32 addr; + u32 reg_range; + u32 link_mask; + int count; + void __iomem *mmio_base; + acpi_handle handle; + struct device *parent; + struct device *dev; + /* use to protect acp common registers access */ + struct mutex *acp_lock; +}; + +int sdw_amd_probe(struct sdw_amd_res *res, struct sdw_amd_ctx **ctx); + +void sdw_amd_exit(struct sdw_amd_ctx *ctx); + +int sdw_amd_get_slave_info(struct sdw_amd_ctx *ctx); + +int amd_sdw_scan_controller(struct sdw_amd_acpi_info *info); #endif diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 00bb22d96ae5..8e78417156e3 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -22,6 +22,7 @@ /* LCAP */ #define SDW_SHIM_LCAP 0x0 #define SDW_SHIM_LCAP_LCOUNT_MASK GENMASK(2, 0) +#define SDW_SHIM_LCAP_MLCS_MASK BIT(8) /* LCTL */ #define SDW_SHIM_LCTL 0x4 @@ -30,12 +31,18 @@ #define SDW_SHIM_LCTL_SPA_MASK GENMASK(3, 0) #define SDW_SHIM_LCTL_CPA BIT(8) #define SDW_SHIM_LCTL_CPA_MASK GENMASK(11, 8) +#define SDW_SHIM_LCTL_MLCS_MASK GENMASK(29, 27) +#define SDW_SHIM_MLCS_XTAL_CLK 0x0 +#define SDW_SHIM_MLCS_CARDINAL_CLK 0x1 +#define SDW_SHIM_MLCS_AUDIO_PLL_CLK 0x2 /* SYNC */ #define SDW_SHIM_SYNC 0xC -#define SDW_SHIM_SYNC_SYNCPRD_VAL_24 (24000 / SDW_CADENCE_GSYNC_KHZ - 1) -#define SDW_SHIM_SYNC_SYNCPRD_VAL_38_4 (38400 / SDW_CADENCE_GSYNC_KHZ - 1) +#define SDW_SHIM_SYNC_SYNCPRD_VAL_24 (24000 / SDW_CADENCE_GSYNC_KHZ - 1) +#define SDW_SHIM_SYNC_SYNCPRD_VAL_24_576 (24576 / SDW_CADENCE_GSYNC_KHZ - 1) +#define SDW_SHIM_SYNC_SYNCPRD_VAL_38_4 (38400 / SDW_CADENCE_GSYNC_KHZ - 1) +#define SDW_SHIM_SYNC_SYNCPRD_VAL_96 (96000 / SDW_CADENCE_GSYNC_KHZ - 1) #define SDW_SHIM_SYNC_SYNCPRD GENMASK(14, 0) #define SDW_SHIM_SYNC_SYNCCPU BIT(15) #define SDW_SHIM_SYNC_CMDSYNC_MASK GENMASK(19, 16) diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index 138bec908c40..658b10fa5b20 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -13,7 +13,7 @@ #define SDW_REG_NO_PAGE 0x00008000 #define SDW_REG_OPTIONAL_PAGE 0x00010000 -#define SDW_REG_MAX 0x80000000 +#define SDW_REG_MAX 0x48000000 #define SDW_DPN_SIZE 0x100 #define SDW_BANK1_OFFSET 0x10 diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h index d8c27f1e5559..693320b4f5c2 100644 --- a/include/linux/soundwire/sdw_type.h +++ b/include/linux/soundwire/sdw_type.h @@ -4,9 +4,9 @@ #ifndef __SOUNDWIRE_TYPES_H #define __SOUNDWIRE_TYPES_H -extern struct bus_type sdw_bus_type; -extern struct device_type sdw_slave_type; -extern struct device_type sdw_master_type; +extern const struct bus_type sdw_bus_type; +extern const struct device_type sdw_slave_type; +extern const struct device_type sdw_master_type; static inline int is_sdw_slave(const struct device *dev) { diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h deleted file mode 100644 index 0916cb9bcb0a..000000000000 --- a/include/linux/spi/pxa2xx_spi.h +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs - */ -#ifndef __LINUX_SPI_PXA2XX_SPI_H -#define __LINUX_SPI_PXA2XX_SPI_H - -#include <linux/types.h> - -#include <linux/pxa2xx_ssp.h> - -struct dma_chan; - -/* - * The platform data for SSP controller devices - * (resides in device.platform_data). - */ -struct pxa2xx_spi_controller { - u16 num_chipselect; - u8 enable_dma; - u8 dma_burst_size; - bool is_target; - - /* DMA engine specific config */ - bool (*dma_filter)(struct dma_chan *chan, void *param); - void *tx_param; - void *rx_param; - - /* For non-PXA arches */ - struct ssp_device ssp; -}; - -/* - * The controller specific data for SPI target devices - * (resides in spi_board_info.controller_data), - * copied to spi_device.platform_data ... mostly for - * DMA tuning. - */ -struct pxa2xx_spi_chip { - u8 tx_threshold; - u8 tx_hi_threshold; - u8 rx_threshold; - u8 dma_burst_size; - u32 timeout; -}; - -#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) - -#include <linux/clk.h> - -extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info); - -#endif - -#endif /* __LINUX_SPI_PXA2XX_SPI_H */ diff --git a/include/linux/spi/rspi.h b/include/linux/spi/rspi.h deleted file mode 100644 index dbdfcc7a3db2..000000000000 --- a/include/linux/spi/rspi.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Renesas SPI driver - * - * Copyright (C) 2012 Renesas Solutions Corp. - */ - -#ifndef __LINUX_SPI_RENESAS_SPI_H__ -#define __LINUX_SPI_RENESAS_SPI_H__ - -struct rspi_plat_data { - unsigned int dma_tx_id; - unsigned int dma_rx_id; - - u16 num_chipselect; -}; - -#endif diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 600fbd5daf68..e8e1e798924f 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -36,7 +36,7 @@ struct spi_message; * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, * and SPI infrastructure. */ -extern struct bus_type spi_bus_type; +extern const struct bus_type spi_bus_type; /** * struct spi_statistics - statistics for spi transfers @@ -131,7 +131,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * struct spi_device - Controller side proxy for an SPI slave device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. - * @master: Copy of controller, for backwards compatibility. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. @@ -185,7 +184,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_device { struct device dev; struct spi_controller *controller; - struct spi_controller *master; /* Compatibility layer */ u32 max_speed_hz; u8 chip_select[SPI_CS_CNT_MAX]; u8 bits_per_word; @@ -422,8 +420,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @bus_lock_spinlock: spinlock for SPI bus locking * @bus_lock_mutex: mutex for exclusion of multiple callers * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use - * @multi_cs_cap: indicates that the SPI Controller can assert/de-assert - * more than one chip select at once. * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. @@ -452,9 +448,12 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() * @cur_msg_mapped: message has been mapped for DMA + * @fallback: fallback to PIO if DMA transfer return failure with + * SPI_TRANS_FAIL_NO_START. + * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. * @last_cs: the last chip_select that is recorded by set_cs, -1 on non chip * selected - * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. + * @last_cs_index_mask: bit mask the last chip selects that were used * @xfer_completion: used by core transfer_one_message() * @busy: message pump is busy * @running: message pump is running @@ -477,6 +476,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * * @set_cs: set the logic level of the chip select line. May be called * from interrupt context. + * @optimize_message: optimize the message for reuse + * @unoptimize_message: release resources allocated by optimize_message * @prepare_message: set up the controller to transfer a single message, * for example doing DMA mapping. Called from threaded * context. @@ -529,8 +530,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * If the driver does not set this, the SPI core takes the snapshot as * close to the driver hand-over as possible. * @irq_flags: Interrupt enable state during PTP system timestamping - * @fallback: fallback to PIO if DMA transfer return failure with - * SPI_TRANS_FAIL_NO_START. * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core @@ -710,13 +709,15 @@ struct spi_controller { bool rt; bool auto_runtime_pm; bool cur_msg_mapped; - char last_cs[SPI_CS_CNT_MAX]; - char last_cs_index_mask; - bool last_cs_mode_high; bool fallback; + bool last_cs_mode_high; + s8 last_cs[SPI_CS_CNT_MAX]; + u32 last_cs_index_mask : SPI_CS_CNT_MAX; struct completion xfer_completion; size_t max_dma_len; + int (*optimize_message)(struct spi_message *msg); + int (*unoptimize_message)(struct spi_message *msg); int (*prepare_transfer_hardware)(struct spi_controller *ctlr); int (*transfer_one_message)(struct spi_controller *ctlr, struct spi_message *mesg); @@ -955,8 +956,8 @@ struct spi_res { * struct spi_transfer - a read/write buffer pair * @tx_buf: data to be written (DMA-safe memory), or NULL * @rx_buf: data to be read (DMA-safe memory), or NULL - * @tx_dma: DMA address of tx_buf, if @spi_message.is_dma_mapped - * @rx_dma: DMA address of rx_buf, if @spi_message.is_dma_mapped + * @tx_dma: DMA address of tx_buf, currently not for client use + * @rx_dma: DMA address of rx_buf, currently not for client use * @tx_nbits: number of bits used for writing. If 0 the default * (SPI_NBITS_SINGLE) is used. * @rx_nbits: number of bits used for reading. If 0 the default @@ -1066,8 +1067,7 @@ struct spi_transfer { /* * It's okay if tx_buf == rx_buf (right?). * For MicroWire, one buffer must be NULL. - * Buffers must work with dma_*map_single() calls, unless - * spi_message.is_dma_mapped reports a pre-existing mapping. + * Buffers must work with dma_*map_single() calls. */ const void *tx_buf; void *rx_buf; @@ -1111,18 +1111,19 @@ struct spi_transfer { * struct spi_message - one multi-segment SPI transaction * @transfers: list of transfer segments in this transaction * @spi: SPI device to which the transaction is queued - * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual - * addresses for each transfer buffer + * @pre_optimized: peripheral driver pre-optimized the message + * @optimized: the message is in the optimized state + * @prepared: spi_prepare_message was called for the this message + * @status: zero for success, else negative errno * @complete: called to report transaction completions * @context: the argument to complete() when it's called * @frame_length: the total number of bytes in the message * @actual_length: the total number of bytes that were transferred in all * successful segments - * @status: zero for success, else negative errno * @queue: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message + * @opt_state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed - * @prepared: spi_prepare_message was called for the this message * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1143,7 +1144,10 @@ struct spi_message { struct spi_device *spi; - unsigned is_dma_mapped:1; + /* spi_optimize_message() was called for this message */ + bool pre_optimized; + /* __spi_optimize_message() was called for this message */ + bool optimized; /* spi_prepare_message() was called for this message */ bool prepared; @@ -1174,6 +1178,11 @@ struct spi_message { */ struct list_head queue; void *state; + /* + * Optional state for use by controller driver between calls to + * __spi_optimize_message() and __spi_unoptimize_message(). + */ + void *opt_state; /* List of spi_res resources when the SPI message is processed */ struct list_head resources; @@ -1257,6 +1266,9 @@ static inline void spi_message_free(struct spi_message *m) kfree(m); } +extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg); +extern void spi_unoptimize_message(struct spi_message *msg); + extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); @@ -1298,7 +1310,7 @@ spi_max_transfer_size(struct spi_device *spi) */ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) { - u32 bpw_mask = spi->master->bits_per_word_mask; + u32 bpw_mask = spi->controller->bits_per_word_mask; if (bpw == 8 || (bpw <= 32 && bpw_mask & SPI_BPW_MASK(bpw))) return true; @@ -1365,12 +1377,10 @@ struct spi_replaced_transfers { extern int spi_split_transfers_maxsize(struct spi_controller *ctlr, struct spi_message *msg, - size_t maxsize, - gfp_t gfp); + size_t maxsize); extern int spi_split_transfers_maxwords(struct spi_controller *ctlr, struct spi_message *msg, - size_t maxwords, - gfp_t gfp); + size_t maxwords); /*---------------------------------------------------------------------------*/ @@ -1670,20 +1680,4 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers); } -/* Compatibility layer */ -#define spi_master spi_controller - -#define spi_master_get_devdata(_ctlr) spi_controller_get_devdata(_ctlr) -#define spi_master_set_devdata(_ctlr, _data) \ - spi_controller_set_devdata(_ctlr, _data) -#define spi_master_get(_ctlr) spi_controller_get(_ctlr) -#define spi_master_put(_ctlr) spi_controller_put(_ctlr) -#define spi_master_suspend(_ctlr) spi_controller_suspend(_ctlr) -#define spi_master_resume(_ctlr) spi_controller_resume(_ctlr) - -#define spi_register_master(_ctlr) spi_register_controller(_ctlr) -#define devm_spi_register_master(_dev, _ctlr) \ - devm_spi_register_controller(_dev, _ctlr) -#define spi_unregister_master(_ctlr) spi_unregister_controller(_ctlr) - #endif /* __LINUX_SPI_H */ diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 4444c2a992cb..b930eca2ef7b 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -10,7 +10,7 @@ struct spi_bitbang { u8 use_dma; u16 flags; /* extra spi->mode support */ - struct spi_master *master; + struct spi_controller *ctlr; /* setup_transfer() changes clock and/or wordsize to match settings * for this transfer; zeroes restore defaults from spi_device. diff --git a/include/linux/spi/spi_gpio.h b/include/linux/spi/spi_gpio.h index 9e7e83d8645b..5f0e1407917a 100644 --- a/include/linux/spi/spi_gpio.h +++ b/include/linux/spi/spi_gpio.h @@ -15,8 +15,8 @@ */ /** - * struct spi_gpio_platform_data - parameter for bitbanged SPI master - * @num_chipselect: how many slaves to allow + * struct spi_gpio_platform_data - parameter for bitbanged SPI host controller + * @num_chipselect: how many target devices to allow */ struct spi_gpio_platform_data { u16 num_chipselect; diff --git a/include/linux/spi/xilinx_spi.h b/include/linux/spi/xilinx_spi.h index 3934ce789d87..1b8d984668b6 100644 --- a/include/linux/spi/xilinx_spi.h +++ b/include/linux/spi/xilinx_spi.h @@ -2,19 +2,23 @@ #ifndef __LINUX_SPI_XILINX_SPI_H #define __LINUX_SPI_XILINX_SPI_H +#include <linux/types.h> + +struct spi_board_info; + /** * struct xspi_platform_data - Platform data of the Xilinx SPI driver - * @num_chipselect: Number of chip select by the IP. - * @little_endian: If registers should be accessed little endian or not. - * @bits_per_word: Number of bits per word. * @devices: Devices to add when the driver is probed. * @num_devices: Number of devices in the devices array. + * @num_chipselect: Number of chip select by the IP. + * @bits_per_word: Number of bits per word. + * @force_irq: If set, forces QSPI transaction requirements. */ struct xspi_platform_data { - u16 num_chipselect; - u8 bits_per_word; struct spi_board_info *devices; u8 num_devices; + u8 num_chipselect; + u8 bits_per_word; bool force_irq; }; diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 447133171d95..4d96bbdb45f0 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -64,8 +64,10 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) { int idx; + preempt_disable(); // Needed for PREEMPT_AUTO idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1); + preempt_enable(); return idx; } diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index 1f326da289d3..a2257380c3f1 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -621,14 +621,6 @@ extern u32 ssb_dma_translation(struct ssb_device *dev); #define SSB_DMA_TRANSLATION_MASK 0xC0000000 #define SSB_DMA_TRANSLATION_SHIFT 30 -static inline void __cold __ssb_dma_not_implemented(struct ssb_device *dev) -{ -#ifdef CONFIG_SSB_DEBUG - printk(KERN_ERR "SSB: BUG! Calling DMA API for " - "unsupported bustype %d\n", dev->bus->bustype); -#endif /* DEBUG */ -} - #ifdef CONFIG_SSB_PCIHOST /* PCI-host wrapper driver */ extern int ssb_pcihost_register(struct pci_driver *driver); diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index adcbb8f23600..e9ec32fb97d4 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -30,6 +30,52 @@ typedef u32 depot_stack_handle_t; */ #define STACK_DEPOT_EXTRA_BITS 5 +#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) + +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ +#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER)) +#define DEPOT_STACK_ALIGN 4 +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ + STACK_DEPOT_EXTRA_BITS) + +#ifdef CONFIG_STACKDEPOT +/* Compact structure that stores a reference to a stack. */ +union handle_parts { + depot_stack_handle_t handle; + struct { + u32 pool_index_plus_1 : DEPOT_POOL_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; + }; +}; + +struct stack_record { + struct list_head hash_list; /* Links in the hash table */ + u32 hash; /* Hash in hash table */ + u32 size; /* Number of stored frames */ + union handle_parts handle; /* Constant after initialization */ + refcount_t count; + union { + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + struct { + /* + * An important invariant of the implementation is to + * only place a stack record onto the freelist iff its + * refcount is zero. Because stack records with a zero + * refcount are never considered as valid, it is safe to + * union @entries and freelist management state below. + * Conversely, as soon as an entry is off the freelist + * and its refcount becomes non-zero, the below must not + * be accessed until being placed back on the freelist. + */ + struct list_head free_list; /* Links in the freelist */ + unsigned long rcu_state; /* RCU cookie */ + }; + }; +}; +#endif + typedef u32 depot_flags_t; /* @@ -132,6 +178,17 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); /** + * __stack_depot_get_stack_record - Get a pointer to a stack_record struct + * + * @handle: Stack depot handle + * + * This function is only for internal purposes. + * + * Return: Returns a pointer to a stack_record struct + */ +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle); + +/** * stack_depot_fetch - Fetch a stack trace from stack depot * * @handle: Stack depot handle returned from stack_depot_save() diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h index a9806a44a605..09f994ac87df 100644 --- a/include/linux/start_kernel.h +++ b/include/linux/start_kernel.h @@ -9,7 +9,5 @@ up something else. */ extern asmlinkage void __init __noreturn start_kernel(void); -extern void __init __noreturn arch_call_rest_init(void); -extern void __ref __noreturn rest_init(void); #endif /* _LINUX_START_KERNEL_H */ diff --git a/include/linux/stat.h b/include/linux/stat.h index 52150570d37a..bf92441dbad2 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -53,6 +53,7 @@ struct kstat { u32 dio_mem_align; u32 dio_offset_align; u64 change_cookie; + u64 subvol; }; /* These definitions are internal to the kernel for now. Mainly used by nfsd. */ diff --git a/include/linux/stm.h b/include/linux/stm.h index 3b22689512be..2fcbef9608f6 100644 --- a/include/linux/stm.h +++ b/include/linux/stm.h @@ -30,6 +30,16 @@ enum stp_packet_flags { STP_PACKET_TIMESTAMPED = 0x2, }; +/** + * enum stm_source_type - STM source driver + * @STM_USER: any STM trace source + * @STM_FTRACE: ftrace STM source + */ +enum stm_source_type { + STM_USER, + STM_FTRACE, +}; + struct stp_policy; struct stm_device; @@ -106,6 +116,7 @@ struct stm_source_device; * @name: device name, will be used for policy lookup * @src: internal structure, only used by stm class code * @nr_chans: number of channels to allocate + * @type: type of STM source driver represented by stm_source_type * @link: called when this source gets linked to an STM device * @unlink: called when this source is about to get unlinked from its STM * @@ -117,6 +128,7 @@ struct stm_source_data { struct stm_source_device *src; unsigned int percpu; unsigned int nr_chans; + unsigned int type; int (*link)(struct stm_source_data *data); void (*unlink)(struct stm_source_data *data); }; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index dee5ad6e48c5..f92c195c76ed 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -115,20 +115,6 @@ struct stmmac_axi { bool axi_rb; }; -#define EST_GCL 1024 -struct stmmac_est { - struct mutex lock; - int enable; - u32 btr_reserve[2]; - u32 btr_offset[2]; - u32 btr[2]; - u32 ctr[2]; - u32 ter; - u32 gcl_unaligned[EST_GCL]; - u32 gcl[EST_GCL]; - u32 gcl_size; -}; - struct stmmac_rxq_cfg { u8 mode_to_use; u32 chan; @@ -245,7 +231,6 @@ struct plat_stmmacenet_data { struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; - struct stmmac_est *est; struct stmmac_fpe_cfg *fpe_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; @@ -284,6 +269,8 @@ struct plat_stmmacenet_data { int (*crosststamp)(ktime_t *device, struct system_counterval_t *system, void *ctx); void (*dump_debug_regs)(void *priv); + int (*pcs_init)(struct stmmac_priv *priv); + void (*pcs_exit)(struct stmmac_priv *priv); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; diff --git a/include/linux/string.h b/include/linux/string.h index ab148d8dbfc1..60168aa2af07 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -2,6 +2,7 @@ #ifndef _LINUX_STRING_H_ #define _LINUX_STRING_H_ +#include <linux/args.h> #include <linux/array_size.h> #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ @@ -13,8 +14,8 @@ #include <uapi/linux/string.h> extern char *strndup_user(const char __user *, long); -extern void *memdup_user(const void __user *, size_t); -extern void *vmemdup_user(const void __user *, size_t); +extern void *memdup_user(const void __user *, size_t) __realloc_size(2); +extern void *vmemdup_user(const void __user *, size_t) __realloc_size(2); extern void *memdup_user_nul(const void __user *, size_t); /** @@ -26,7 +27,8 @@ extern void *memdup_user_nul(const void __user *, size_t); * Return: an ERR_PTR() on failure. Result is physically * contiguous, to be freed by kfree(). */ -static inline void *memdup_array_user(const void __user *src, size_t n, size_t size) +static inline __realloc_size(2, 3) +void *memdup_array_user(const void __user *src, size_t n, size_t size) { size_t nbytes; @@ -45,7 +47,8 @@ static inline void *memdup_array_user(const void __user *src, size_t n, size_t s * Return: an ERR_PTR() on failure. Result may be not * physically contiguous. Use kvfree() to free. */ -static inline void *vmemdup_array_user(const void __user *src, size_t n, size_t size) +static inline __realloc_size(2, 3) +void *vmemdup_array_user(const void __user *src, size_t n, size_t size) { size_t nbytes; @@ -66,12 +69,79 @@ extern char * strcpy(char *,const char *); #ifndef __HAVE_ARCH_STRNCPY extern char * strncpy(char *,const char *, __kernel_size_t); #endif -#ifndef __HAVE_ARCH_STRSCPY -ssize_t strscpy(char *, const char *, size_t); -#endif +ssize_t sized_strscpy(char *, const char *, size_t); + +/* + * The 2 argument style can only be used when dst is an array with a + * known size. + */ +#define __strscpy0(dst, src, ...) \ + sized_strscpy(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy1(dst, src, size) sized_strscpy(dst, src, size) + +#define __strscpy_pad0(dst, src, ...) \ + sized_strscpy_pad(dst, src, sizeof(dst) + __must_be_array(dst)) +#define __strscpy_pad1(dst, src, size) sized_strscpy_pad(dst, src, size) + +/** + * strscpy - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer (optional) + * + * Copy the source string @src, or as much of it as fits, into the + * destination @dst buffer. The behavior is undefined if the string + * buffers overlap. The destination @dst buffer is always NUL terminated, + * unless it's zero-sized. + * + * The size argument @... is only required when @dst is not an array, or + * when the copy needs to be smaller than sizeof(@dst). + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zero padded. If padding is desired please use strscpy_pad(). + * + * Returns the number of characters copied in @dst (not including the + * trailing %NUL) or -E2BIG if @size is 0 or the copy from @src was + * truncated. + */ +#define strscpy(dst, src, ...) \ + CONCATENATE(__strscpy, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) + +#define sized_strscpy_pad(dest, src, count) ({ \ + char *__dst = (dest); \ + const char *__src = (src); \ + const size_t __count = (count); \ + ssize_t __wrote; \ + \ + __wrote = sized_strscpy(__dst, __src, __count); \ + if (__wrote >= 0 && __wrote < __count) \ + memset(__dst + __wrote + 1, 0, __count - __wrote - 1); \ + __wrote; \ +}) -/* Wraps calls to strscpy()/memset(), no arch specific code required */ -ssize_t strscpy_pad(char *dest, const char *src, size_t count); +/** + * strscpy_pad() - Copy a C-string into a sized buffer + * @dst: Where to copy the string to + * @src: Where to copy the string from + * @...: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. The + * behavior is undefined if the string buffers overlap. The destination + * buffer is always %NUL terminated, unless it's zero-sized. + * + * If the source string is shorter than the destination buffer, the + * remaining bytes in the buffer will be filled with %NUL bytes. + * + * For full explanation of why you may want to consider using the + * 'strscpy' functions please see the function docstring for strscpy(). + * + * Returns: + * * The number of characters copied (not including the trailing %NULs) + * * -E2BIG if count is 0 or @src was truncated. + */ +#define strscpy_pad(dst, src, ...) \ + CONCATENATE(__strscpy_pad, COUNT_ARGS(__VA_ARGS__))(dst, src, __VA_ARGS__) #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); @@ -214,13 +284,25 @@ extern void kfree_const(const void *x); extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); +extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_size(2); +#define kmemdup(...) alloc_hooks(kmemdup_noprof(__VA_ARGS__)) + extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); +extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) + __realloc_size(2, 3); +/* lib/argv_split.c */ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); +/* lib/cmdline.c */ +extern int get_option(char **str, int *pint); +extern char *get_options(const char *str, int nints, int *ints); +extern unsigned long long memparse(const char *ptr, char **retptr); +extern bool parse_option_str(const char *str, const char *option); +extern char *next_arg(char *args, char **param, char **val); + extern bool sysfs_streq(const char *s1, const char *s2); int match_string(const char * const *array, size_t n, const char *string); int __sysfs_match_string(const char * const *array, size_t n, const char *s); @@ -346,6 +428,55 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, } while (0) /** + * memtostr - Copy a possibly non-NUL-term string to a NUL-term string + * @dest: Pointer to destination NUL-terminates string + * @src: Pointer to character array (likely marked as __nonstring) + * + * This is a replacement for strncpy() uses where the source is not + * a NUL-terminated string. + * + * Note that sizes of @dest and @src must be known at compile-time. + */ +#define memtostr(dest, src) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _src_chars = strnlen(src, _src_len); \ + const size_t _copy_len = min(_dest_len - 1, _src_chars); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + !__builtin_constant_p(_src_len) || \ + _dest_len == 0 || _dest_len == (size_t)-1 || \ + _src_len == 0 || _src_len == (size_t)-1); \ + memcpy(dest, src, _copy_len); \ + dest[_copy_len] = '\0'; \ +} while (0) + +/** + * memtostr_pad - Copy a possibly non-NUL-term string to a NUL-term string + * with NUL padding in the destination + * @dest: Pointer to destination NUL-terminates string + * @src: Pointer to character array (likely marked as __nonstring) + * + * This is a replacement for strncpy() uses where the source is not + * a NUL-terminated string. + * + * Note that sizes of @dest and @src must be known at compile-time. + */ +#define memtostr_pad(dest, src) do { \ + const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _src_chars = strnlen(src, _src_len); \ + const size_t _copy_len = min(_dest_len - 1, _src_chars); \ + \ + BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ + !__builtin_constant_p(_src_len) || \ + _dest_len == 0 || _dest_len == (size_t)-1 || \ + _src_len == 0 || _src_len == (size_t)-1); \ + memcpy(dest, src, _copy_len); \ + memset(&dest[_copy_len], 0, _dest_len - _copy_len); \ +} while (0) + +/** * memset_after - Set a value after a struct member to the end of a struct * * @obj: Address of target struct instance diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 3c1091941eb8..d9ebe20229f8 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -42,4 +42,15 @@ static inline const char *str_yes_no(bool v) return v ? "yes" : "no"; } +/** + * str_plural - Return the simple pluralization based on English counts + * @num: Number used for deciding pluralization + * + * If @num is 1, returns empty string, otherwise returns "s". + */ +static inline const char *str_plural(size_t num) +{ + return num == 1 ? "" : "s"; +} + #endif diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 58fb1f90eda5..e93fbb5b0c01 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -17,14 +17,18 @@ static inline bool string_is_terminated(const char *s, int len) return memchr(s, '\0', len) ? true : false; } -/* Descriptions of the types of units to - * print in */ +/* Descriptions of the types of units to print in */ enum string_size_units { STRING_UNITS_10, /* use powers of 10^3 (standard SI) */ STRING_UNITS_2, /* use binary powers of 2^10 */ + STRING_UNITS_MASK = BIT(0), + + /* Modifiers */ + STRING_UNITS_NO_SPACE = BIT(30), + STRING_UNITS_NO_BYTES = BIT(31), }; -int string_get_size(u64 size, u64 blk_size, enum string_size_units units, +int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, char *buf, int len); int parse_int_array_user(const char __user *from, size_t count, int **array); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 5e9d1469c6fa..5321585c778f 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -139,6 +139,7 @@ struct rpc_create_args { const char *servername; const char *nodename; const struct rpc_program *program; + struct rpc_stat *stats; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2d61987b3545..0c77ba488bba 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -197,7 +197,7 @@ struct rpc_wait_queue { unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 67cf1c9efd80..23617da0e565 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -339,7 +339,6 @@ struct svc_program { const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ - struct svc_stat * pg_stats; /* rpc statistics */ enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); __be32 (*pg_init_request)(struct svc_rqst *, const struct svc_program *, @@ -411,7 +410,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); -struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, +struct svc_serv * svc_create_pooled(struct svc_program *prog, + struct svc_stat *stats, + unsigned int bufsize, int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_info *si, struct file *file); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e7595ae62fe2..d33bab33099a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -203,6 +203,29 @@ struct svc_rdma_recv_ctxt { struct page *rc_pages[RPCSVC_MAXPAGES]; }; +/* + * State for sending a Write chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions + */ +struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + + const struct svc_rdma_chunk *wi_chunk; + + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + + /* SGL constructor arguments */ + const struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; +}; + struct svc_rdma_send_ctxt { struct llist_node sc_node; struct rpc_rdma_cid sc_cid; @@ -210,9 +233,12 @@ struct svc_rdma_send_ctxt { struct svcxprt_rdma *sc_rdma; struct ib_send_wr sc_send_wr; + struct ib_send_wr *sc_wr_chain; + int sc_sqecount; struct ib_cqe sc_cqe; struct xdr_buf sc_hdrbuf; struct xdr_stream sc_stream; + struct svc_rdma_write_info sc_reply_info; void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; @@ -236,18 +262,24 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); extern int svc_rdma_recvfrom(struct svc_rqst *); /* svc_rdma_rw.c */ +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc); extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, struct svc_rdma_chunk_ctxt *cc, enum dma_data_direction dir); -extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_chunk *chunk, - const struct xdr_buf *xdr); -extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, - const struct svc_rdma_recv_ctxt *rctxt, - const struct xdr_buf *xdr); +extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, struct svc_rdma_recv_ctxt *head); @@ -258,8 +290,8 @@ extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); -extern int svc_rdma_send(struct svcxprt_rdma *rdma, - struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, const struct svc_rdma_pcl *write_pcl, diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 8e20cd60e2e7..0981e35a9fed 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -135,6 +135,9 @@ int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); +int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name, + struct net *net, struct sockaddr *sap, + int flags, const struct cred *cred); int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags, @@ -147,6 +150,8 @@ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); void svc_xprt_close(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); +struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name, + struct net *net, const struct sockaddr *sa); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, struct net *net, const sa_family_t af, const unsigned short port); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 464f6a9492ab..81b952649d35 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -152,6 +152,7 @@ struct rpc_xprt_ops { int (*prepare_request)(struct rpc_rqst *req, struct xdr_buf *buf); int (*send_request)(struct rpc_rqst *req); + void (*abort_send_request)(struct rpc_rqst *req); void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ef503088942d..da6ebca3ff77 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -40,65 +40,6 @@ typedef int __bitwise suspend_state_t; #define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE #define PM_SUSPEND_MAX ((__force suspend_state_t) 4) -enum suspend_stat_step { - SUSPEND_FREEZE = 1, - SUSPEND_PREPARE, - SUSPEND_SUSPEND, - SUSPEND_SUSPEND_LATE, - SUSPEND_SUSPEND_NOIRQ, - SUSPEND_RESUME_NOIRQ, - SUSPEND_RESUME_EARLY, - SUSPEND_RESUME -}; - -struct suspend_stats { - int success; - int fail; - int failed_freeze; - int failed_prepare; - int failed_suspend; - int failed_suspend_late; - int failed_suspend_noirq; - int failed_resume; - int failed_resume_early; - int failed_resume_noirq; -#define REC_FAILED_NUM 2 - int last_failed_dev; - char failed_devs[REC_FAILED_NUM][40]; - int last_failed_errno; - int errno[REC_FAILED_NUM]; - int last_failed_step; - u64 last_hw_sleep; - u64 total_hw_sleep; - u64 max_hw_sleep; - enum suspend_stat_step failed_steps[REC_FAILED_NUM]; -}; - -extern struct suspend_stats suspend_stats; - -static inline void dpm_save_failed_dev(const char *name) -{ - strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev], - name, - sizeof(suspend_stats.failed_devs[0])); - suspend_stats.last_failed_dev++; - suspend_stats.last_failed_dev %= REC_FAILED_NUM; -} - -static inline void dpm_save_failed_errno(int err) -{ - suspend_stats.errno[suspend_stats.last_failed_errno] = err; - suspend_stats.last_failed_errno++; - suspend_stats.last_failed_errno %= REC_FAILED_NUM; -} - -static inline void dpm_save_failed_step(enum suspend_stat_step step) -{ - suspend_stats.failed_steps[suspend_stats.last_failed_step] = step; - suspend_stats.last_failed_step++; - suspend_stats.last_failed_step %= REC_FAILED_NUM; -} - /** * struct platform_suspend_ops - Callbacks for managing platform dependent * system sleep states. @@ -626,4 +567,19 @@ static inline void queue_up_suspend_work(void) {} #endif /* !CONFIG_PM_AUTOSLEEP */ +enum suspend_stat_step { + SUSPEND_WORKING = 0, + SUSPEND_FREEZE, + SUSPEND_PREPARE, + SUSPEND_SUSPEND, + SUSPEND_SUSPEND_LATE, + SUSPEND_SUSPEND_NOIRQ, + SUSPEND_RESUME_NOIRQ, + SUSPEND_RESUME_EARLY, + SUSPEND_RESUME +}; + +void dpm_save_failed_dev(const char *name); +void dpm_save_failed_step(enum suspend_stat_step step); + #endif /* _LINUX_SUSPEND_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 8d28f6091a32..bd450023b9a4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -259,7 +259,20 @@ struct swap_cluster_info { }; #define CLUSTER_FLAG_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ -#define CLUSTER_FLAG_HUGE 4 /* This cluster is backing a transparent huge page */ + +/* + * The first page in the swap file is the swap header, which is always marked + * bad to prevent it from being allocated as an entry. This also prevents the + * cluster to which it belongs being marked free. Therefore 0 is safe to use as + * a sentinel to indicate next is not valid in percpu_cluster. + */ +#define SWAP_NEXT_INVALID 0 + +#ifdef CONFIG_THP_SWAP +#define SWAP_NR_ORDERS (PMD_ORDER + 1) +#else +#define SWAP_NR_ORDERS 1 +#endif /* * We assign a cluster to each CPU, so each CPU can allocate swap entry from @@ -267,8 +280,7 @@ struct swap_cluster_info { * throughput. */ struct percpu_cluster { - struct swap_cluster_info index; /* Current cluster index */ - unsigned int next; /* Likely next allocation offset */ + unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ }; struct swap_cluster_list { @@ -298,10 +310,8 @@ struct swap_info_struct { unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ - struct bdev_handle *bdev_handle;/* open handle of the bdev */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ - unsigned int old_block_size; /* seldom referenced */ struct completion comp; /* seldom referenced */ spinlock_t lock; /* * protect map scan related fields like @@ -350,16 +360,6 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); void workingset_activation(struct folio *folio); -/* Only track the nodes of mappings with shadow entries */ -void workingset_update_node(struct xa_node *node); -extern struct list_lru shadow_nodes; -#define mapping_set_update(xas, mapping) do { \ - if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \ - xas_set_update(xas, workingset_update_node); \ - xas_set_lru(xas, &shadow_nodes); \ - } \ -} while (0) - /* linux/mm/page_alloc.c */ extern unsigned long totalreserve_pages; @@ -447,9 +447,9 @@ static inline unsigned long total_swapcache_pages(void) return global_node_page_state(NR_SWAPCACHE); } -extern void free_swap_cache(struct page *page); -extern void free_page_and_swap_cache(struct page *); -extern void free_pages_and_swap_cache(struct encoded_page **, int); +void free_swap_cache(struct folio *folio); +void free_page_and_swap_cache(struct page *); +void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; @@ -472,14 +472,14 @@ swp_entry_t folio_alloc_swap(struct folio *folio); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); -extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size); +extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); extern void swapcache_free_entries(swp_entry_t *entries, int n); -extern int free_swap_and_cache(swp_entry_t); +extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); @@ -528,10 +528,11 @@ static inline void put_swap_device(struct swap_info_struct *si) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); -/* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */ -#define free_swap_and_cache(e) is_pfn_swap_entry(e) +static inline void free_swap_and_cache_nr(swp_entry_t entry, int nr) +{ +} -static inline void free_swap_cache(struct page *page) +static inline void free_swap_cache(struct folio *folio) { } @@ -597,14 +598,10 @@ static inline int add_swap_extent(struct swap_info_struct *sis, } #endif /* CONFIG_SWAP */ -#ifdef CONFIG_THP_SWAP -extern int split_swap_cluster(swp_entry_t entry); -#else -static inline int split_swap_cluster(swp_entry_t entry) +static inline void free_swap_and_cache(swp_entry_t entry) { - return 0; + free_swap_and_cache_nr(entry, 1); } -#endif #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index bff1e8d97de0..a5c560a2f8c2 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) } #endif /* CONFIG_MIGRATION */ +#ifdef CONFIG_MEMORY_FAILURE + +/* + * Support for hardware poisoned pages + */ +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + BUG_ON(!PageLocked(page)); + return swp_entry(SWP_HWPOISON, page_to_pfn(page)); +} + +static inline int is_hwpoison_entry(swp_entry_t entry) +{ + return swp_type(entry) == SWP_HWPOISON; +} + +#else + +static inline swp_entry_t make_hwpoison_entry(struct page *page) +{ + return swp_entry(0, 0); +} + +static inline int is_hwpoison_entry(swp_entry_t swp) +{ + return 0; +} +#endif + typedef unsigned long pte_marker; #define PTE_MARKER_UFFD_WP BIT(0) @@ -468,10 +497,24 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) return p; } +static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) +{ + struct folio *folio = pfn_folio(swp_offset_pfn(entry)); + + /* + * Any use of migration entries may only occur while the + * corresponding folio is locked + */ + BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio)); + + return folio; +} + /* * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They are used to represent unaddressable device memory - * and to restrict access to a page undergoing migration. + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. */ static inline bool is_pfn_swap_entry(swp_entry_t entry) { @@ -479,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry) BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry); + is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); } struct page_vma_mapped_walk; @@ -548,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#ifdef CONFIG_MEMORY_FAILURE - -/* - * Support for hardware poisoned pages - */ -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - BUG_ON(!PageLocked(page)); - return swp_entry(SWP_HWPOISON, page_to_pfn(page)); -} - -static inline int is_hwpoison_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_HWPOISON; -} - -#else - -static inline swp_entry_t make_hwpoison_entry(struct page *page) -{ - return swp_entry(0, 0); -} - -static inline int is_hwpoison_entry(swp_entry_t swp) -{ - return 0; -} -#endif - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ecde0312dd52..14bc10c1bb23 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -43,7 +43,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, extern void __init swiotlb_update_mem_attributes(void); phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, - size_t mapping_size, size_t alloc_size, + size_t mapping_size, unsigned int alloc_aligned_mask, enum dma_data_direction dir, unsigned long attrs); @@ -120,6 +120,8 @@ struct io_tlb_pool { * debugfs. * @used_hiwater: The high water mark for total_used. Used only for reporting * in debugfs. + * @transient_nslabs: The total number of slots in all transient pools that + * are currently used across all areas. */ struct io_tlb_mem { struct io_tlb_pool defpool; @@ -137,6 +139,7 @@ struct io_tlb_mem { #ifdef CONFIG_DEBUG_FS atomic_long_t total_used; atomic_long_t used_hiwater; + atomic_long_t transient_nslabs; #endif }; diff --git a/include/linux/sync_core.h b/include/linux/sync_core.h index 013da4b8b327..67bb9794b875 100644 --- a/include/linux/sync_core.h +++ b/include/linux/sync_core.h @@ -17,5 +17,19 @@ static inline void sync_core_before_usermode(void) } #endif -#endif /* _LINUX_SYNC_CORE_H */ +#ifdef CONFIG_ARCH_HAS_PREPARE_SYNC_CORE_CMD +#include <asm/sync_core.h> +#else +/* + * This is a dummy prepare_sync_core_cmd() implementation that can be used on + * all architectures which provide unconditional core serializing instructions + * in switch_mm(). + * If your architecture doesn't provide such core serializing instructions in + * switch_mm(), you may need to write your own functions. + */ +static inline void prepare_sync_core_cmd(struct mm_struct *mm) +{ +} +#endif +#endif /* _LINUX_SYNC_CORE_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 77eb9b0e7685..9104952d323d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -821,6 +821,7 @@ asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); +asmlinkage long sys_mseal(unsigned long start, size_t len, unsigned long flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, const unsigned long __user *nmask, @@ -960,10 +961,10 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, - size_t *size, __u32 flags); + u32 *size, u32 flags); asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, - size_t size, __u32 flags); -asmlinkage long sys_lsm_list_modules(u64 *ids, size_t *size, u32 flags); + u32 size, u32 flags); +asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); /* * Architecture-specific system calls diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index ee7d33b89e9e..09db2f2e6488 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -137,17 +137,6 @@ struct ctl_table { void *data; int maxlen; umode_t mode; - /** - * enum type - Enumeration to differentiate between ctl target types - * @SYSCTL_TABLE_TYPE_DEFAULT: ctl target with no special considerations - * @SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY: Used to identify a permanently - * empty directory target to serve - * as mount point. - */ - enum { - SYSCTL_TABLE_TYPE_DEFAULT, - SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY - } type; proc_handler *proc_handler; /* Callback for text formatting */ struct ctl_table_poll *poll; void *extra1; @@ -182,12 +171,23 @@ struct ctl_table_header { struct rcu_head rcu; }; struct completion *unregistering; - struct ctl_table *ctl_table_arg; + const struct ctl_table *ctl_table_arg; struct ctl_table_root *root; struct ctl_table_set *set; struct ctl_dir *parent; struct ctl_node *node; struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */ + /** + * enum type - Enumeration to differentiate between ctl target types + * @SYSCTL_TABLE_TYPE_DEFAULT: ctl target with no special considerations + * @SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY: Used to identify a permanently + * empty directory target to serve + * as mount point. + */ + enum { + SYSCTL_TABLE_TYPE_DEFAULT, + SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY, + } type; }; struct ctl_dir { @@ -205,9 +205,8 @@ struct ctl_table_root { struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root); void (*set_ownership)(struct ctl_table_header *head, - struct ctl_table *table, kuid_t *uid, kgid_t *gid); - int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); + int (*permissions)(struct ctl_table_header *head, const struct ctl_table *table); }; #define register_sysctl(path, table) \ diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 19cb803dd5ec..c9cb657dad08 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -91,7 +91,8 @@ static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); struct platform_device *sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); + const struct simplefb_platform_data *mode, + struct device *parent); #else /* CONFIG_SYSFB_SIMPLE */ @@ -102,7 +103,8 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, } static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) + const struct simplefb_platform_data *mode, + struct device *parent) { return ERR_PTR(-EINVAL); } diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index b717a70219f6..a7d725fbf739 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -61,22 +61,32 @@ do { \ /** * struct attribute_group - data structure used to declare an attribute group. * @name: Optional: Attribute group name - * If specified, the attribute group will be created in - * a new subdirectory with this name. + * If specified, the attribute group will be created in a + * new subdirectory with this name. Additionally when a + * group is named, @is_visible and @is_bin_visible may + * return SYSFS_GROUP_INVISIBLE to control visibility of + * the directory itself. * @is_visible: Optional: Function to return permissions associated with an - * attribute of the group. Will be called repeatedly for each - * non-binary attribute in the group. Only read/write + * attribute of the group. Will be called repeatedly for + * each non-binary attribute in the group. Only read/write * permissions as well as SYSFS_PREALLOC are accepted. Must - * return 0 if an attribute is not visible. The returned value - * will replace static permissions defined in struct attribute. + * return 0 if an attribute is not visible. The returned + * value will replace static permissions defined in struct + * attribute. Use SYSFS_GROUP_VISIBLE() when assigning this + * callback to specify separate _group_visible() and + * _attr_visible() handlers. * @is_bin_visible: * Optional: Function to return permissions associated with a * binary attribute of the group. Will be called repeatedly * for each binary attribute in the group. Only read/write - * permissions as well as SYSFS_PREALLOC are accepted. Must - * return 0 if a binary attribute is not visible. The returned - * value will replace static permissions defined in - * struct bin_attribute. + * permissions as well as SYSFS_PREALLOC (and the + * visibility flags for named groups) are accepted. Must + * return 0 if a binary attribute is not visible. The + * returned value will replace static permissions defined + * in struct bin_attribute. If @is_visible is not set, Use + * SYSFS_GROUP_VISIBLE() when assigning this callback to + * specify separate _group_visible() and _attr_visible() + * handlers. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. @@ -91,13 +101,121 @@ struct attribute_group { struct bin_attribute **bin_attrs; }; +#define SYSFS_PREALLOC 010000 +#define SYSFS_GROUP_INVISIBLE 020000 + +/* + * DEFINE_SYSFS_GROUP_VISIBLE(name): + * A helper macro to pair with the assignment of ".is_visible = + * SYSFS_GROUP_VISIBLE(name)", that arranges for the directory + * associated with a named attribute_group to optionally be hidden. + * This allows for static declaration of attribute_groups, and the + * simplification of attribute visibility lifetime that implies, + * without polluting sysfs with empty attribute directories. + * Ex. + * + * static umode_t example_attr_visible(struct kobject *kobj, + * struct attribute *attr, int n) + * { + * if (example_attr_condition) + * return 0; + * else if (ro_attr_condition) + * return 0444; + * return a->mode; + * } + * + * static bool example_group_visible(struct kobject *kobj) + * { + * if (example_group_condition) + * return false; + * return true; + * } + * + * DEFINE_SYSFS_GROUP_VISIBLE(example); + * + * static struct attribute_group example_group = { + * .name = "example", + * .is_visible = SYSFS_GROUP_VISIBLE(example), + * .attrs = &example_attrs, + * }; + * + * Note that it expects <name>_attr_visible and <name>_group_visible to + * be defined. For cases where individual attributes do not need + * separate visibility consideration, only entire group visibility at + * once, see DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(). + */ +#define DEFINE_SYSFS_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ + } + +/* + * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name): + * A helper macro to pair with SYSFS_GROUP_VISIBLE() that like + * DEFINE_SYSFS_GROUP_VISIBLE() controls group visibility, but does + * not require the implementation of a per-attribute visibility + * callback. + * Ex. + * + * static bool example_group_visible(struct kobject *kobj) + * { + * if (example_group_condition) + * return false; + * return true; + * } + * + * DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(example); + * + * static struct attribute_group example_group = { + * .name = "example", + * .is_visible = SYSFS_GROUP_VISIBLE(example), + * .attrs = &example_attrs, + * }; + */ +#define DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ + } + +/* + * Same as DEFINE_SYSFS_GROUP_VISIBLE, but for groups with only binary + * attributes. If an attribute_group defines both text and binary + * attributes, the group visibility is determined by the function + * specified to is_visible() not is_bin_visible() + */ +#define DEFINE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct bin_attribute *attr, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return name##_attr_visible(kobj, attr, n); \ + } + +#define DEFINE_SIMPLE_SYSFS_BIN_GROUP_VISIBLE(name) \ + static inline umode_t sysfs_group_visible_##name( \ + struct kobject *kobj, struct bin_attribute *a, int n) \ + { \ + if (n == 0 && !name##_group_visible(kobj)) \ + return SYSFS_GROUP_INVISIBLE; \ + return a->mode; \ + } + +#define SYSFS_GROUP_VISIBLE(fn) sysfs_group_visible_##fn + /* * Use these macros to make defining attributes easier. * See include/linux/device.h for examples.. */ -#define SYSFS_PREALLOC 010000 - #define __ATTR(_name, _mode, _show, _store) { \ .attr = {.name = __stringify(_name), \ .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ @@ -253,6 +371,17 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RO(_name, _size) #define BIN_ATTR_ADMIN_RW(_name, _size) \ struct bin_attribute bin_attr_##_name = __BIN_ATTR_ADMIN_RW(_name, _size) +#define __BIN_ATTR_SIMPLE_RO(_name, _mode) { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .read = sysfs_bin_attr_simple_read, \ +} + +#define BIN_ATTR_SIMPLE_RO(_name) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_SIMPLE_RO(_name, 0444) + +#define BIN_ATTR_SIMPLE_ADMIN_RO(_name) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_SIMPLE_RO(_name, 0400) + struct sysfs_ops { ssize_t (*show)(struct kobject *, struct attribute *, char *); ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); @@ -360,6 +489,10 @@ int sysfs_emit(char *buf, const char *fmt, ...); __printf(3, 4) int sysfs_emit_at(char *buf, int at, const char *fmt, ...); +ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count); + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) diff --git a/include/linux/tc.h b/include/linux/tc.h index a60639f37963..1638660abf5e 100644 --- a/include/linux/tc.h +++ b/include/linux/tc.h @@ -120,7 +120,7 @@ static inline unsigned long tc_get_speed(struct tc_bus *tbus) #ifdef CONFIG_TC -extern struct bus_type tc_bus_type; +extern const struct bus_type tc_bus_type; extern int tc_register_driver(struct tc_driver *tdrv); extern void tc_unregister_driver(struct tc_driver *tdrv); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a1c47a6d69b0..6a5e08b937b3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -244,6 +244,7 @@ struct tcp_sock { /* OOO segments go in this rbtree. Socket lock must be held. */ struct rb_root out_of_order_queue; u32 snd_ssthresh; /* Slow start size threshold */ + u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ __cacheline_group_end(tcp_sock_read_rx); /* TX read-write hotpath cache lines */ @@ -264,10 +265,8 @@ struct tcp_sock { u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lsndtime; u32 mdev_us; /* medium deviation */ - u64 tcp_wstamp_ns; /* departure time for next sent data packet */ - u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ - u64 tcp_mstamp; /* most recent packet received/sent */ u32 rtt_seq; /* sequence number to update rttvar */ + u64 tcp_wstamp_ns; /* departure time for next sent data packet */ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ struct sk_buff *highest_sack; /* skb just after the highest * skb with SACKed bit set @@ -284,6 +283,8 @@ struct tcp_sock { * 0x5?10 << 16 + snd_wnd in net byte order */ __be32 pred_flags; + u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ + u64 tcp_mstamp; /* most recent packet received/sent */ u32 rcv_nxt; /* What we want to receive next */ u32 snd_nxt; /* Next sequence we send */ u32 snd_una; /* First byte we want an ack for */ @@ -304,7 +305,7 @@ struct tcp_sock { __cacheline_group_end(tcp_sock_write_txrx); /* RX read-write hotpath cache lines */ - __cacheline_group_begin(tcp_sock_write_rx); + __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes @@ -350,7 +351,6 @@ struct tcp_sock { u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups * total number of DSACK blocks received */ - u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 compressed_ack_rcv_nxt; struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ @@ -371,7 +371,6 @@ struct tcp_sock { tlp_retrans:1, /* TLP is a retransmission */ unused:5; u8 thin_lto : 1,/* Use linear timeouts for thin streams */ - recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ fastopen_client_fail:2, /* reason why fastopen failed */ @@ -384,12 +383,12 @@ struct tcp_sock { syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + u8 keepalive_probes; /* num of allowed keep alive probes */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ /* RTT measurement */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ - u8 keepalive_probes; /* num of allowed keep alive probes */ u32 reord_seen; /* number of data packet reordering events */ /* @@ -402,6 +401,7 @@ struct tcp_sock { u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ + u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ struct hrtimer pacing_timer; struct hrtimer compressed_ack_timer; @@ -477,8 +477,8 @@ struct tcp_sock { bool is_mptcp; #endif #if IS_ENABLED(CONFIG_SMC) - bool (*smc_hs_congested)(const struct sock *sk); bool syn_smc; /* SYN includes SMC */ + bool (*smc_hs_congested)(const struct sock *sk); #endif #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) diff --git a/include/linux/tee_core.h b/include/linux/tee_core.h new file mode 100644 index 000000000000..efd16ed52315 --- /dev/null +++ b/include/linux/tee_core.h @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024 Linaro Limited + */ + +#ifndef __TEE_CORE_H +#define __TEE_CORE_H + +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/idr.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/tee.h> +#include <linux/tee_drv.h> +#include <linux/types.h> +#include <linux/uuid.h> + +/* + * The file describes the API provided by the generic TEE driver to the + * specific TEE driver. + */ + +#define TEE_SHM_DYNAMIC BIT(0) /* Dynamic shared memory registered */ + /* in secure world */ +#define TEE_SHM_USER_MAPPED BIT(1) /* Memory mapped in user space */ +#define TEE_SHM_POOL BIT(2) /* Memory allocated from pool */ +#define TEE_SHM_PRIV BIT(3) /* Memory private to TEE driver */ + +#define TEE_DEVICE_FLAG_REGISTERED 0x1 +#define TEE_MAX_DEV_NAME_LEN 32 + +/** + * struct tee_device - TEE Device representation + * @name: name of device + * @desc: description of device + * @id: unique id of device + * @flags: represented by TEE_DEVICE_FLAG_REGISTERED above + * @dev: embedded basic device structure + * @cdev: embedded cdev + * @num_users: number of active users of this device + * @c_no_user: completion used when unregistering the device + * @mutex: mutex protecting @num_users and @idr + * @idr: register of user space shared memory objects allocated or + * registered on this device + * @pool: shared memory pool + */ +struct tee_device { + char name[TEE_MAX_DEV_NAME_LEN]; + const struct tee_desc *desc; + int id; + unsigned int flags; + + struct device dev; + struct cdev cdev; + + size_t num_users; + struct completion c_no_users; + struct mutex mutex; /* protects num_users and idr */ + + struct idr idr; + struct tee_shm_pool *pool; +}; + +/** + * struct tee_driver_ops - driver operations vtable + * @get_version: returns version of driver + * @open: called when the device file is opened + * @release: release this open file + * @open_session: open a new session + * @close_session: close a session + * @system_session: declare session as a system session + * @invoke_func: invoke a trusted function + * @cancel_req: request cancel of an ongoing invoke or open + * @supp_recv: called for supplicant to get a command + * @supp_send: called for supplicant to send a response + * @shm_register: register shared memory buffer in TEE + * @shm_unregister: unregister shared memory buffer in TEE + */ +struct tee_driver_ops { + void (*get_version)(struct tee_device *teedev, + struct tee_ioctl_version_data *vers); + int (*open)(struct tee_context *ctx); + void (*release)(struct tee_context *ctx); + int (*open_session)(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); + int (*close_session)(struct tee_context *ctx, u32 session); + int (*system_session)(struct tee_context *ctx, u32 session); + int (*invoke_func)(struct tee_context *ctx, + struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); + int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); + int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); + int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); + int (*shm_register)(struct tee_context *ctx, struct tee_shm *shm, + struct page **pages, size_t num_pages, + unsigned long start); + int (*shm_unregister)(struct tee_context *ctx, struct tee_shm *shm); +}; + +/** + * struct tee_desc - Describes the TEE driver to the subsystem + * @name: name of driver + * @ops: driver operations vtable + * @owner: module providing the driver + * @flags: Extra properties of driver, defined by TEE_DESC_* below + */ +#define TEE_DESC_PRIVILEGED 0x1 +struct tee_desc { + const char *name; + const struct tee_driver_ops *ops; + struct module *owner; + u32 flags; +}; + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data); + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev); + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev); + +/** + * tee_session_calc_client_uuid() - Calculates client UUID for session + * @uuid: Resulting UUID + * @connection_method: Connection method for session (TEE_IOCTL_LOGIN_*) + * @connectuon_data: Connection data for opening session + * + * Based on connection method calculates UUIDv5 based client UUID. + * + * For group based logins verifies that calling process has specified + * credentials. + * + * @return < 0 on failure + */ +int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, + const u8 connection_data[TEE_IOCTL_UUID_LEN]); + +/** + * struct tee_shm_pool - shared memory pool + * @ops: operations + * @private_data: private data for the shared memory manager + */ +struct tee_shm_pool { + const struct tee_shm_pool_ops *ops; + void *private_data; +}; + +/** + * struct tee_shm_pool_ops - shared memory pool operations + * @alloc: called when allocating shared memory + * @free: called when freeing shared memory + * @destroy_pool: called when destroying the pool + */ +struct tee_shm_pool_ops { + int (*alloc)(struct tee_shm_pool *pool, struct tee_shm *shm, + size_t size, size_t align); + void (*free)(struct tee_shm_pool *pool, struct tee_shm *shm); + void (*destroy_pool)(struct tee_shm_pool *pool); +}; + +/* + * tee_shm_pool_alloc_res_mem() - Create a shm manager for reserved memory + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool *tee_shm_pool_alloc_res_mem(unsigned long vaddr, + phys_addr_t paddr, size_t size, + int min_alloc_order); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * The must be no remaining shared memory allocated from this pool when + * this function is called. + */ +static inline void tee_shm_pool_free(struct tee_shm_pool *pool) +{ + pool->ops->destroy_pool(pool); +} + +/** + * tee_get_drvdata() - Return driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev); + +/** + * tee_shm_alloc_priv_buf() - Allocate shared memory for private use by specific + * TEE driver + * @ctx: The TEE context for shared memory allocation + * @size: Shared memory allocation size + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size); + +int tee_dyn_shm_alloc_helper(struct tee_shm *shm, size_t size, size_t align, + int (*shm_register)(struct tee_context *ctx, + struct tee_shm *shm, + struct page **pages, + size_t num_pages, + unsigned long start)); +void tee_dyn_shm_free_helper(struct tee_shm *shm, + int (*shm_unregister)(struct tee_context *ctx, + struct tee_shm *shm)); + +/** + * tee_shm_is_dynamic() - Check if shared memory object is of the dynamic kind + * @shm: Shared memory handle + * @returns true if object is dynamic shared memory + */ +static inline bool tee_shm_is_dynamic(struct tee_shm *shm) +{ + return shm && (shm->flags & TEE_SHM_DYNAMIC); +} + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm); + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +static inline int tee_shm_get_id(struct tee_shm *shm) +{ + return shm->id; +} + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); + +static inline bool tee_param_is_memref(struct tee_param *param) +{ + switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: + case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: + return true; + default: + return false; + } +} + +/** + * teedev_open() - Open a struct tee_device + * @teedev: Device to open + * + * @return a pointer to struct tee_context on success or an ERR_PTR on failure. + */ +struct tee_context *teedev_open(struct tee_device *teedev); + +/** + * teedev_close_context() - closes a struct tee_context + * @ctx: The struct tee_context to close + */ +void teedev_close_context(struct tee_context *ctx); + +#endif /*__TEE_CORE_H*/ diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 911ddf92dcee..786b9ae6cf4d 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -1,40 +1,28 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2015-2022 Linaro Limited + * Copyright (c) 2015-2024 Linaro Limited */ #ifndef __TEE_DRV_H #define __TEE_DRV_H #include <linux/device.h> -#include <linux/idr.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/mod_devicetable.h> #include <linux/tee.h> #include <linux/types.h> -#include <linux/uuid.h> /* - * The file describes the API provided by the generic TEE driver to the - * specific TEE driver. + * The file describes the API provided by the TEE subsystem to the + * TEE client drivers. */ -#define TEE_SHM_DYNAMIC BIT(0) /* Dynamic shared memory registered */ - /* in secure world */ -#define TEE_SHM_USER_MAPPED BIT(1) /* Memory mapped in user space */ -#define TEE_SHM_POOL BIT(2) /* Memory allocated from pool */ -#define TEE_SHM_PRIV BIT(3) /* Memory private to TEE driver */ - -struct device; struct tee_device; -struct tee_shm; -struct tee_shm_pool; /** * struct tee_context - driver specific context on file pointer data * @teedev: pointer to this drivers struct tee_device - * @list_shm: List of shared memory object owned by this context * @data: driver specific context data, managed by the driver * @refcount: reference counter for this structure * @releasing: flag that indicates if context is being released right now. @@ -57,134 +45,6 @@ struct tee_context { bool cap_memref_null; }; -struct tee_param_memref { - size_t shm_offs; - size_t size; - struct tee_shm *shm; -}; - -struct tee_param_value { - u64 a; - u64 b; - u64 c; -}; - -struct tee_param { - u64 attr; - union { - struct tee_param_memref memref; - struct tee_param_value value; - } u; -}; - -/** - * struct tee_driver_ops - driver operations vtable - * @get_version: returns version of driver - * @open: called when the device file is opened - * @release: release this open file - * @open_session: open a new session - * @close_session: close a session - * @system_session: declare session as a system session - * @invoke_func: invoke a trusted function - * @cancel_req: request cancel of an ongoing invoke or open - * @supp_recv: called for supplicant to get a command - * @supp_send: called for supplicant to send a response - * @shm_register: register shared memory buffer in TEE - * @shm_unregister: unregister shared memory buffer in TEE - */ -struct tee_driver_ops { - void (*get_version)(struct tee_device *teedev, - struct tee_ioctl_version_data *vers); - int (*open)(struct tee_context *ctx); - void (*release)(struct tee_context *ctx); - int (*open_session)(struct tee_context *ctx, - struct tee_ioctl_open_session_arg *arg, - struct tee_param *param); - int (*close_session)(struct tee_context *ctx, u32 session); - int (*system_session)(struct tee_context *ctx, u32 session); - int (*invoke_func)(struct tee_context *ctx, - struct tee_ioctl_invoke_arg *arg, - struct tee_param *param); - int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); - int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, - struct tee_param *param); - int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, - struct tee_param *param); - int (*shm_register)(struct tee_context *ctx, struct tee_shm *shm, - struct page **pages, size_t num_pages, - unsigned long start); - int (*shm_unregister)(struct tee_context *ctx, struct tee_shm *shm); -}; - -/** - * struct tee_desc - Describes the TEE driver to the subsystem - * @name: name of driver - * @ops: driver operations vtable - * @owner: module providing the driver - * @flags: Extra properties of driver, defined by TEE_DESC_* below - */ -#define TEE_DESC_PRIVILEGED 0x1 -struct tee_desc { - const char *name; - const struct tee_driver_ops *ops; - struct module *owner; - u32 flags; -}; - -/** - * tee_device_alloc() - Allocate a new struct tee_device instance - * @teedesc: Descriptor for this driver - * @dev: Parent device for this device - * @pool: Shared memory pool, NULL if not used - * @driver_data: Private driver data for this device - * - * Allocates a new struct tee_device instance. The device is - * removed by tee_device_unregister(). - * - * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure - */ -struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, - struct device *dev, - struct tee_shm_pool *pool, - void *driver_data); - -/** - * tee_device_register() - Registers a TEE device - * @teedev: Device to register - * - * tee_device_unregister() need to be called to remove the @teedev if - * this function fails. - * - * @returns < 0 on failure - */ -int tee_device_register(struct tee_device *teedev); - -/** - * tee_device_unregister() - Removes a TEE device - * @teedev: Device to unregister - * - * This function should be called to remove the @teedev even if - * tee_device_register() hasn't been called yet. Does nothing if - * @teedev is NULL. - */ -void tee_device_unregister(struct tee_device *teedev); - -/** - * tee_session_calc_client_uuid() - Calculates client UUID for session - * @uuid: Resulting UUID - * @connection_method: Connection method for session (TEE_IOCTL_LOGIN_*) - * @connectuon_data: Connection data for opening session - * - * Based on connection method calculates UUIDv5 based client UUID. - * - * For group based logins verifies that calling process has specified - * credentials. - * - * @return < 0 on failure - */ -int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, - const u8 connection_data[TEE_IOCTL_UUID_LEN]); - /** * struct tee_shm - shared memory object * @ctx: context using the object @@ -195,15 +55,12 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, * @pages: locked pages from userspace * @num_pages: number of locked pages * @refcount: reference counter - * @flags: defined by TEE_SHM_* in tee_drv.h + * @flags: defined by TEE_SHM_* in tee_core.h * @id: unique id of a shared memory object on this device, shared * with user space * @sec_world_id: * secure world assigned id of this shared memory object, not * used by all drivers - * - * This pool is only supposed to be accessed directly from the TEE - * subsystem and from drivers that implements their own shm pool manager. */ struct tee_shm { struct tee_context *ctx; @@ -219,88 +76,53 @@ struct tee_shm { u64 sec_world_id; }; -/** - * struct tee_shm_pool - shared memory pool - * @ops: operations - * @private_data: private data for the shared memory manager - */ -struct tee_shm_pool { - const struct tee_shm_pool_ops *ops; - void *private_data; +struct tee_param_memref { + size_t shm_offs; + size_t size; + struct tee_shm *shm; }; -/** - * struct tee_shm_pool_ops - shared memory pool operations - * @alloc: called when allocating shared memory - * @free: called when freeing shared memory - * @destroy_pool: called when destroying the pool - */ -struct tee_shm_pool_ops { - int (*alloc)(struct tee_shm_pool *pool, struct tee_shm *shm, - size_t size, size_t align); - void (*free)(struct tee_shm_pool *pool, struct tee_shm *shm); - void (*destroy_pool)(struct tee_shm_pool *pool); +struct tee_param_value { + u64 a; + u64 b; + u64 c; }; -/* - * tee_shm_pool_alloc_res_mem() - Create a shm manager for reserved memory - * @vaddr: Virtual address of start of pool - * @paddr: Physical address of start of pool - * @size: Size in bytes of the pool - * - * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. - */ -struct tee_shm_pool *tee_shm_pool_alloc_res_mem(unsigned long vaddr, - phys_addr_t paddr, size_t size, - int min_alloc_order); +struct tee_param { + u64 attr; + union { + struct tee_param_memref memref; + struct tee_param_value value; + } u; +}; /** - * tee_shm_pool_free() - Free a shared memory pool - * @pool: The shared memory pool to free - * - * The must be no remaining shared memory allocated from this pool when - * this function is called. + * tee_shm_alloc_kernel_buf() - Allocate kernel shared memory for a + * particular TEE client driver + * @ctx: The TEE context for shared memory allocation + * @size: Shared memory allocation size + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure */ -static inline void tee_shm_pool_free(struct tee_shm_pool *pool) -{ - pool->ops->destroy_pool(pool); -} +struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); /** - * tee_get_drvdata() - Return driver_data pointer - * @returns the driver_data pointer supplied to tee_register(). + * tee_shm_register_kernel_buf() - Register kernel shared memory for a + * particular TEE client driver + * @ctx: The TEE context for shared memory registration + * @addr: Kernel buffer address + * @length: Kernel buffer length + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure */ -void *tee_get_drvdata(struct tee_device *teedev); - -struct tee_shm *tee_shm_alloc_priv_buf(struct tee_context *ctx, size_t size); -struct tee_shm *tee_shm_alloc_kernel_buf(struct tee_context *ctx, size_t size); - struct tee_shm *tee_shm_register_kernel_buf(struct tee_context *ctx, void *addr, size_t length); /** - * tee_shm_is_dynamic() - Check if shared memory object is of the dynamic kind - * @shm: Shared memory handle - * @returns true if object is dynamic shared memory - */ -static inline bool tee_shm_is_dynamic(struct tee_shm *shm) -{ - return shm && (shm->flags & TEE_SHM_DYNAMIC); -} - -/** * tee_shm_free() - Free shared memory * @shm: Handle to shared memory to free */ void tee_shm_free(struct tee_shm *shm); /** - * tee_shm_put() - Decrease reference count on a shared memory handle - * @shm: Shared memory handle - */ -void tee_shm_put(struct tee_shm *shm); - -/** * tee_shm_get_va() - Get virtual address of a shared memory plus an offset * @shm: Shared memory handle * @offs: Offset from start of this shared memory @@ -353,25 +175,6 @@ static inline size_t tee_shm_get_page_offset(struct tee_shm *shm) } /** - * tee_shm_get_id() - Get id of a shared memory object - * @shm: Shared memory handle - * @returns id - */ -static inline int tee_shm_get_id(struct tee_shm *shm) -{ - return shm->id; -} - -/** - * tee_shm_get_from_id() - Find shared memory object and increase reference - * count - * @ctx: Context owning the shared memory - * @id: Id of shared memory object - * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure - */ -struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); - -/** * tee_client_open_context() - Open a TEE context * @start: if not NULL, continue search after this context * @match: function to check TEE device @@ -470,19 +273,7 @@ int tee_client_invoke_func(struct tee_context *ctx, int tee_client_cancel_req(struct tee_context *ctx, struct tee_ioctl_cancel_arg *arg); -static inline bool tee_param_is_memref(struct tee_param *param) -{ - switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) { - case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INPUT: - case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_OUTPUT: - case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT: - return true; - default: - return false; - } -} - -extern struct bus_type tee_bus_type; +extern const struct bus_type tee_bus_type; /** * struct tee_client_device - tee based device @@ -509,18 +300,4 @@ struct tee_client_driver { #define to_tee_client_driver(d) \ container_of(d, struct tee_client_driver, driver) -/** - * teedev_open() - Open a struct tee_device - * @teedev: Device to open - * - * @return a pointer to struct tee_context on success or an ERR_PTR on failure. - */ -struct tee_context *teedev_open(struct tee_device *teedev); - -/** - * teedev_close_context() - closes a struct tee_context - * @ctx: The struct tee_context to close - */ -void teedev_close_context(struct tee_context *ctx); - #endif /*__TEE_DRV_H*/ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index b7a3deb372fd..f1155c0439c4 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -61,18 +61,26 @@ enum thermal_notify_event { * struct thermal_trip - representation of a point in temperature domain * @temperature: temperature value in miliCelsius * @hysteresis: relative hysteresis in miliCelsius - * @threshold: trip crossing notification threshold miliCelsius * @type: trip point type * @priv: pointer to driver data associated with this trip + * @flags: flags representing binary properties of the trip */ struct thermal_trip { int temperature; int hysteresis; - int threshold; enum thermal_trip_type type; + u8 flags; void *priv; }; +#define THERMAL_TRIP_FLAG_RW_TEMP BIT(0) +#define THERMAL_TRIP_FLAG_RW_HYST BIT(1) + +#define THERMAL_TRIP_FLAG_RW (THERMAL_TRIP_FLAG_RW_TEMP | \ + THERMAL_TRIP_FLAG_RW_HYST) + +struct thermal_zone_device; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); @@ -83,7 +91,6 @@ struct thermal_zone_device_ops { int (*change_mode) (struct thermal_zone_device *, enum thermal_device_mode); int (*set_trip_temp) (struct thermal_zone_device *, int, int); - int (*set_trip_hyst) (struct thermal_zone_device *, int, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, @@ -119,114 +126,9 @@ struct thermal_cooling_device { #endif }; -/** - * struct thermal_zone_device - structure for a thermal zone - * @id: unique id number for each thermal zone - * @type: the thermal zone device type - * @device: &struct device for this thermal zone - * @removal: removal completion - * @trip_temp_attrs: attributes for trip points for sysfs: trip temperature - * @trip_type_attrs: attributes for trip points for sysfs: trip type - * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis - * @mode: current mode of this thermal zone - * @devdata: private pointer for device private data - * @trips: an array of struct thermal_trip - * @num_trips: number of trip points the thermal zone supports - * @passive_delay_jiffies: number of jiffies to wait between polls when - * performing passive cooling. - * @polling_delay_jiffies: number of jiffies to wait between polls when - * checking whether trip points have been crossed (0 for - * interrupt driven systems) - * @temperature: current temperature. This is only for core code, - * drivers should use thermal_zone_get_temp() to get the - * current temperature - * @last_temperature: previous temperature read - * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION - * @passive: 1 if you've crossed a passive trip point, 0 otherwise. - * @prev_low_trip: the low current temperature if you've crossed a passive - trip point. - * @prev_high_trip: the above current temperature if you've crossed a - passive trip point. - * @need_update: if equals 1, thermal_zone_device_update needs to be invoked. - * @ops: operations this &thermal_zone_device supports - * @tzp: thermal zone parameters - * @governor: pointer to the governor for this thermal zone - * @governor_data: private pointer for governor data - * @thermal_instances: list of &struct thermal_instance of this thermal zone - * @ida: &struct ida to generate unique id for this zone's cooling - * devices - * @lock: lock to protect thermal_instances list - * @node: node in thermal_tz_list (in thermal_core.c) - * @poll_queue: delayed work for polling - * @notify_event: Last notification event - * @suspended: thermal zone suspend indicator - */ -struct thermal_zone_device { - int id; - char type[THERMAL_NAME_LENGTH]; - struct device device; - struct completion removal; - struct attribute_group trips_attribute_group; - struct thermal_attr *trip_temp_attrs; - struct thermal_attr *trip_type_attrs; - struct thermal_attr *trip_hyst_attrs; - enum thermal_device_mode mode; - void *devdata; - struct thermal_trip *trips; - int num_trips; - unsigned long passive_delay_jiffies; - unsigned long polling_delay_jiffies; - int temperature; - int last_temperature; - int emul_temperature; - int passive; - int prev_low_trip; - int prev_high_trip; - atomic_t need_update; - struct thermal_zone_device_ops *ops; - struct thermal_zone_params *tzp; - struct thermal_governor *governor; - void *governor_data; - struct list_head thermal_instances; - struct ida ida; - struct mutex lock; - struct list_head node; - struct delayed_work poll_queue; - enum thermal_notify_event notify_event; -#ifdef CONFIG_THERMAL_DEBUGFS - struct thermal_debugfs *debugfs; -#endif - bool suspended; -}; - -/** - * struct thermal_governor - structure that holds thermal governor information - * @name: name of the governor - * @bind_to_tz: callback called when binding to a thermal zone. If it - * returns 0, the governor is bound to the thermal zone, - * otherwise it fails. - * @unbind_from_tz: callback called when a governor is unbound from a - * thermal zone. - * @throttle: callback called for every trip point even if temperature is - * below the trip point temperature - * @update_tz: callback called when thermal zone internals have changed, e.g. - * thermal cooling instance was added/removed - * @governor_list: node in thermal_governor_list (in thermal_core.c) - */ -struct thermal_governor { - char name[THERMAL_NAME_LENGTH]; - int (*bind_to_tz)(struct thermal_zone_device *tz); - void (*unbind_from_tz)(struct thermal_zone_device *tz); - int (*throttle)(struct thermal_zone_device *tz, - const struct thermal_trip *trip); - void (*update_tz)(struct thermal_zone_device *tz, - enum thermal_notify_event reason); - struct list_head governor_list; -}; - /* Structure to define Thermal Zone parameters */ struct thermal_zone_params { - char governor_name[THERMAL_NAME_LENGTH]; + const char *governor_name; /* * a boolean to indicate if the thermal to hwmon sysfs interface @@ -315,17 +217,16 @@ int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp); #ifdef CONFIG_THERMAL struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, - struct thermal_trip *trips, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp, int passive_delay, int polling_delay); struct thermal_zone_device *thermal_tripless_zone_device_register( const char *type, void *devdata, - struct thermal_zone_device_ops *ops, + const struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp); void thermal_zone_device_unregister(struct thermal_zone_device *tz); @@ -375,10 +276,9 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz); #else static inline struct thermal_zone_device *thermal_zone_device_register_with_trips( const char *type, - struct thermal_trip *trips, - int num_trips, int mask, - void *devdata, - struct thermal_zone_device_ops *ops, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp, int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } diff --git a/include/linux/threads.h b/include/linux/threads.h index c34173e6c5f1..1674a471b0b4 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -25,13 +25,13 @@ /* * This controls the default maximum pid allocated to a process */ -#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) +#define PID_MAX_DEFAULT (IS_ENABLED(CONFIG_BASE_SMALL) ? 0x1000 : 0x8000) /* * A maximum of 4 million PIDs should be enough for a while. * [NOTE: PID/TIDs are limited to 2^30 ~= 1 billion, see FUTEX_TID_MASK.] */ -#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ +#define PID_MAX_LIMIT (IS_ENABLED(CONFIG_BASE_SMALL) ? PAGE_SIZE * 8 : \ (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) /* diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 2c835e5c41f6..7d902d8c054b 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -33,7 +33,6 @@ enum tb_cfg_pkg_type { TB_CFG_PKG_ICM_EVENT = 10, TB_CFG_PKG_ICM_CMD = 11, TB_CFG_PKG_ICM_RESP = 12, - TB_CFG_PKG_PREPARE_TO_SLEEP = 13, }; /** @@ -87,8 +86,8 @@ struct tb { }; extern const struct bus_type tb_bus_type; -extern struct device_type tb_service_type; -extern struct device_type tb_xdomain_type; +extern const struct device_type tb_service_type; +extern const struct device_type tb_xdomain_type; #define TB_LINKS_PER_PHY_PORT 2 diff --git a/include/linux/tick.h b/include/linux/tick.h index 716d17f31c45..4924a33700b7 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -12,6 +12,7 @@ #include <linux/cpumask.h> #include <linux/sched.h> #include <linux/rcupdate.h> +#include <linux/static_key.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void __init tick_init(void); @@ -19,16 +20,22 @@ extern void __init tick_init(void); extern void tick_suspend_local(void); /* Should be core only, but XEN resume magic and ARM BL switcher require it */ extern void tick_resume_local(void); -extern void tick_handover_do_timer(void); extern void tick_cleanup_dead_cpu(int cpu); #else /* CONFIG_GENERIC_CLOCKEVENTS */ static inline void tick_init(void) { } static inline void tick_suspend_local(void) { } static inline void tick_resume_local(void) { } -static inline void tick_handover_do_timer(void) { } static inline void tick_cleanup_dead_cpu(int cpu) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU) +extern int tick_cpu_dying(unsigned int cpu); +extern void tick_assert_timekeeping_handover(void); +#else +#define tick_cpu_dying NULL +static inline void tick_assert_timekeeping_handover(void) { } +#endif + #if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND) extern void tick_freeze(void); extern void tick_unfreeze(void); @@ -63,18 +70,14 @@ enum tick_broadcast_state { TICK_BROADCAST_ENTER, }; +extern struct static_key_false arch_needs_tick_broadcast; + #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_control(enum tick_broadcast_mode mode); #else static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU) -extern void tick_offline_cpu(unsigned int cpu); -#else -static inline void tick_offline_cpu(unsigned int cpu) { } -#endif - #ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); #else @@ -164,9 +167,16 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #endif /* !CONFIG_NO_HZ_COMMON */ +/* + * Mask of CPUs that are nohz_full. + * + * Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu() + * check. + */ +extern cpumask_var_t tick_nohz_full_mask; + #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; -extern cpumask_var_t tick_nohz_full_mask; static inline bool tick_nohz_full_enabled(void) { diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h index c6540ceea143..0982d1d52b24 100644 --- a/include/linux/timecounter.h +++ b/include/linux/timecounter.h @@ -22,7 +22,7 @@ * * @read: returns the current cycle value * @mask: bitmask for two's complement - * subtraction of non 64 bit counters, + * subtraction of non-64-bit counters, * see CYCLECOUNTER_MASK() helper macro * @mult: cycle to nanosecond multiplier * @shift: cycle to nanosecond divisor (power of two) @@ -35,7 +35,7 @@ struct cyclecounter { }; /** - * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds + * struct timecounter - layer above a &struct cyclecounter which counts nanoseconds * Contains the state needed by timecounter_read() to detect * cycle counter wrap around. Initialize with * timecounter_init(). Also used to convert cycle counts into the @@ -66,6 +66,8 @@ struct timecounter { * @cycles: Cycles * @mask: bit mask for maintaining the 'frac' field * @frac: pointer to storage for the fractional nanoseconds. + * + * Returns: cycle counter cycles converted to nanoseconds */ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, u64 cycles, u64 mask, u64 *frac) @@ -79,6 +81,7 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc, /** * timecounter_adjtime - Shifts the time of the clock. + * @tc: The &struct timecounter to adjust * @delta: Desired change in nanoseconds. */ static inline void timecounter_adjtime(struct timecounter *tc, s64 delta) @@ -107,6 +110,8 @@ extern void timecounter_init(struct timecounter *tc, * * In other words, keeps track of time since the same epoch as * the function which generated the initial time stamp. + * + * Returns: nanoseconds since the initial time stamp */ extern u64 timecounter_read(struct timecounter *tc); @@ -123,6 +128,8 @@ extern u64 timecounter_read(struct timecounter *tc); * * This allows conversion of cycle counter values which were generated * in the past. + * + * Returns: cycle counter converted to nanoseconds since the initial time stamp */ extern u64 timecounter_cyc2time(const struct timecounter *tc, u64 cycle_tstamp); diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7c43e98cf211..0ea7823b7f31 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -22,14 +22,14 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* - * ktime_get() family: read the current time in a multitude of ways, + * ktime_get() family - read the current time in a multitude of ways. * * The default time reference is CLOCK_MONOTONIC, starting at * boot time but not counting the time spent in suspend. * For other references, use the functions with "real", "clocktai", * "boottime" and "raw" suffixes. * - * To get the time in a different format, use the ones wit + * To get the time in a different format, use the ones with * "ns", "ts64" and "seconds" suffix. * * See Documentation/core-api/timekeeping.rst for more details. @@ -74,6 +74,8 @@ extern u32 ktime_get_resolution_ns(void); /** * ktime_get_real - get the real (wall-) time in ktime_t format + * + * Returns: real (wall) time in ktime_t format */ static inline ktime_t ktime_get_real(void) { @@ -86,10 +88,12 @@ static inline ktime_t ktime_get_coarse_real(void) } /** - * ktime_get_boottime - Returns monotonic time since boot in ktime_t format + * ktime_get_boottime - Get monotonic time since boot in ktime_t format * * This is similar to CLOCK_MONTONIC/ktime_get, but also includes the * time spent in suspend. + * + * Returns: monotonic time since boot in ktime_t format */ static inline ktime_t ktime_get_boottime(void) { @@ -102,7 +106,9 @@ static inline ktime_t ktime_get_coarse_boottime(void) } /** - * ktime_get_clocktai - Returns the TAI time of day in ktime_t format + * ktime_get_clocktai - Get the TAI time of day in ktime_t format + * + * Returns: the TAI time of day in ktime_t format */ static inline ktime_t ktime_get_clocktai(void) { @@ -144,32 +150,60 @@ static inline u64 ktime_get_coarse_clocktai_ns(void) /** * ktime_mono_to_real - Convert monotonic time to clock realtime + * @mono: monotonic time to convert + * + * Returns: time converted to realtime clock */ static inline ktime_t ktime_mono_to_real(ktime_t mono) { return ktime_mono_to_any(mono, TK_OFFS_REAL); } +/** + * ktime_get_ns - Get the current time in nanoseconds + * + * Returns: current time converted to nanoseconds + */ static inline u64 ktime_get_ns(void) { return ktime_to_ns(ktime_get()); } +/** + * ktime_get_real_ns - Get the current real/wall time in nanoseconds + * + * Returns: current real time converted to nanoseconds + */ static inline u64 ktime_get_real_ns(void) { return ktime_to_ns(ktime_get_real()); } +/** + * ktime_get_boottime_ns - Get the monotonic time since boot in nanoseconds + * + * Returns: current boottime converted to nanoseconds + */ static inline u64 ktime_get_boottime_ns(void) { return ktime_to_ns(ktime_get_boottime()); } +/** + * ktime_get_clocktai_ns - Get the current TAI time of day in nanoseconds + * + * Returns: current TAI time converted to nanoseconds + */ static inline u64 ktime_get_clocktai_ns(void) { return ktime_to_ns(ktime_get_clocktai()); } +/** + * ktime_get_raw_ns - Get the raw monotonic time in nanoseconds + * + * Returns: current raw monotonic time converted to nanoseconds + */ static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); @@ -224,8 +258,8 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* - * struct ktime_timestanps - Simultaneous mono/boot/real timestamps +/** + * struct ktime_timestamps - Simultaneous mono/boot/real timestamps * @mono: Monotonic timestamp * @boot: Boottime timestamp * @real: Realtime timestamp @@ -242,7 +276,8 @@ struct ktime_timestamps { * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time - * @clock_was_set_seq: The sequence number of clock was set events + * @cs_id: Clocksource ID + * @clock_was_set_seq: The sequence number of clock-was-set events * @cs_was_changed_seq: The sequence number of clocksource change events */ struct system_time_snapshot { @@ -268,15 +303,17 @@ struct system_device_crosststamp { }; /** - * struct system_counterval_t - system counter value with the pointer to the + * struct system_counterval_t - system counter value with the ID of the * corresponding clocksource * @cycles: System counter value - * @cs: Clocksource corresponding to system counter value. Used by - * timekeeping code to verify comparibility of two cycle values + * @cs_id: Clocksource ID corresponding to system counter value. Used by + * timekeeping code to verify comparability of two cycle values. + * The default ID, CSID_GENERIC, does not identify a specific + * clocksource. */ struct system_counterval_t { u64 cycles; - struct clocksource *cs; + enum clocksource_ids cs_id; }; /* diff --git a/include/linux/timer.h b/include/linux/timer.h index f18a2f1eb79e..e67ecd1cbc97 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -22,7 +22,7 @@ #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn) #endif -/** +/* * @TIMER_DEFERRABLE: A deferrable timer will work normally when the * system is busy, but will not cause a CPU to come out of idle just * to service it; instead, the timer will be serviced when the CPU @@ -36,16 +36,10 @@ * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! * - * @TIMER_PINNED: A pinned timer will not be affected by any timer - * placement heuristics (like, NOHZ) and will always expire on the CPU - * on which the timer was enqueued. - * - * Note: Because enqueuing of timers can migrate the timer from one - * CPU to another, pinned timers are not guaranteed to stay on the - * initialy selected CPU. They move to the CPU on which the enqueue - * function is invoked via mod_timer() or add_timer(). If the timer - * should be placed on a particular CPU, then add_timer_on() has to be - * used. + * @TIMER_PINNED: A pinned timer will always expire on the CPU on which the + * timer was enqueued. When a particular CPU is required, add_timer_on() + * has to be used. Enqueue via mod_timer() and add_timer() is always done + * on the local CPU. */ #define TIMER_CPUMASK 0x0003FFFF #define TIMER_MIGRATING 0x00040000 @@ -146,7 +140,7 @@ static inline void destroy_timer_on_stack(struct timer_list *timer) { } * or not. Callers must ensure serialization wrt. other operations done * to this timer, eg. interrupt contexts, or other CPUs on SMP. * - * return value: 1 if the timer is pending, 0 if not. + * Returns: 1 if the timer is pending, 0 if not. */ static inline int timer_pending(const struct timer_list * timer) { @@ -165,6 +159,8 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) extern void add_timer(struct timer_list *timer); +extern void add_timer_local(struct timer_list *timer); +extern void add_timer_global(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); @@ -179,6 +175,10 @@ extern int timer_shutdown(struct timer_list *timer); * See timer_delete_sync() for detailed explanation. * * Do not use in new code. Use timer_delete_sync() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer_sync(struct timer_list *timer) { @@ -192,6 +192,10 @@ static inline int del_timer_sync(struct timer_list *timer) * See timer_delete() for detailed explanation. * * Do not use in new code. Use timer_delete() instead. + * + * Returns: + * * %0 - The timer was not pending + * * %1 - The timer was pending and deactivated */ static inline int del_timer(struct timer_list *timer) { diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index 62973f7d4610..d306d9dd2207 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -37,11 +37,6 @@ static inline bool timerqueue_node_queued(struct timerqueue_node *node) return !RB_EMPTY_NODE(&node->node); } -static inline bool timerqueue_node_expires(struct timerqueue_node *node) -{ - return node->expires; -} - static inline void timerqueue_init_head(struct timerqueue_head *head) { head->rb_root = RB_ROOT_CACHED; diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 4ee9d13749ad..c17e4efbb2e5 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -23,6 +23,7 @@ #include <linux/fs.h> #include <linux/highmem.h> #include <crypto/hash_info.h> +#include <crypto/aes.h> #define TPM_DIGEST_SIZE 20 /* Max TPM v1.2 PCR size */ #define TPM_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE @@ -30,17 +31,28 @@ struct tpm_chip; struct trusted_key_payload; struct trusted_key_options; +/* opaque structure, holds auth session parameters like the session key */ +struct tpm2_auth; + +enum tpm2_session_types { + TPM2_SE_HMAC = 0x00, + TPM2_SE_POLICY = 0x01, + TPM2_SE_TRIAL = 0x02, +}; /* if you add a new hash to this, increment TPM_MAX_HASHES below */ enum tpm_algorithms { TPM_ALG_ERROR = 0x0000, TPM_ALG_SHA1 = 0x0004, + TPM_ALG_AES = 0x0006, TPM_ALG_KEYEDHASH = 0x0008, TPM_ALG_SHA256 = 0x000B, TPM_ALG_SHA384 = 0x000C, TPM_ALG_SHA512 = 0x000D, TPM_ALG_NULL = 0x0010, TPM_ALG_SM3_256 = 0x0012, + TPM_ALG_ECC = 0x0023, + TPM_ALG_CFB = 0x0043, }; /* @@ -49,6 +61,11 @@ enum tpm_algorithms { */ #define TPM_MAX_HASHES 5 +enum tpm2_curves { + TPM2_ECC_NONE = 0x0000, + TPM2_ECC_NIST_P256 = 0x0003, +}; + struct tpm_digest { u16 alg_id; u8 digest[TPM_MAX_DIGEST_SIZE]; @@ -116,6 +133,20 @@ struct tpm_chip_seqops { const struct seq_operations *seqops; }; +/* fixed define for the curve we use which is NIST_P256 */ +#define EC_PT_SZ 32 + +/* + * fixed define for the size of a name. This is actually HASHALG size + * plus 2, so 32 for SHA256 + */ +#define TPM2_NAME_SIZE 34 + +/* + * The maximum size for an object context + */ +#define TPM2_MAX_CONTEXT_SIZE 4096 + struct tpm_chip { struct device dev; struct device devs; @@ -170,6 +201,18 @@ struct tpm_chip { /* active locality */ int locality; + +#ifdef CONFIG_TCG_TPM2_HMAC + /* details for communication security via sessions */ + + /* saved context for NULL seed */ + u8 null_key_context[TPM2_MAX_CONTEXT_SIZE]; + /* name of NULL seed */ + u8 null_key_name[TPM2_NAME_SIZE]; + u8 null_ec_key_x[EC_PT_SZ]; + u8 null_ec_key_y[EC_PT_SZ]; + struct tpm2_auth *auth; +#endif }; #define TPM_HEADER_SIZE 10 @@ -194,6 +237,7 @@ enum tpm2_timeouts { enum tpm2_structures { TPM2_ST_NO_SESSIONS = 0x8001, TPM2_ST_SESSIONS = 0x8002, + TPM2_ST_CREATION = 0x8021, }; /* Indicates from what layer of the software stack the error comes from */ @@ -204,6 +248,7 @@ enum tpm2_return_codes { TPM2_RC_SUCCESS = 0x0000, TPM2_RC_HASH = 0x0083, /* RC_FMT1 */ TPM2_RC_HANDLE = 0x008B, + TPM2_RC_INTEGRITY = 0x009F, TPM2_RC_INITIALIZE = 0x0100, /* RC_VER1 */ TPM2_RC_FAILURE = 0x0101, TPM2_RC_DISABLED = 0x0120, @@ -231,6 +276,8 @@ enum tpm2_command_codes { TPM2_CC_CONTEXT_LOAD = 0x0161, TPM2_CC_CONTEXT_SAVE = 0x0162, TPM2_CC_FLUSH_CONTEXT = 0x0165, + TPM2_CC_READ_PUBLIC = 0x0173, + TPM2_CC_START_AUTH_SESS = 0x0176, TPM2_CC_VERIFY_SIGNATURE = 0x0177, TPM2_CC_GET_CAPABILITY = 0x017A, TPM2_CC_GET_RANDOM = 0x017B, @@ -243,9 +290,25 @@ enum tpm2_command_codes { }; enum tpm2_permanent_handles { + TPM2_RH_NULL = 0x40000007, TPM2_RS_PW = 0x40000009, }; +/* Most Significant Octet for key types */ +enum tpm2_mso_type { + TPM2_MSO_NVRAM = 0x01, + TPM2_MSO_SESSION = 0x02, + TPM2_MSO_POLICY = 0x03, + TPM2_MSO_PERMANENT = 0x40, + TPM2_MSO_VOLATILE = 0x80, + TPM2_MSO_PERSISTENT = 0x81, +}; + +static inline enum tpm2_mso_type tpm2_handle_mso(u32 handle) +{ + return handle >> 24; +} + enum tpm2_capabilities { TPM2_CAP_HANDLES = 1, TPM2_CAP_COMMANDS = 2, @@ -284,6 +347,7 @@ enum tpm_chip_flags { TPM_CHIP_FLAG_FIRMWARE_UPGRADE = BIT(7), TPM_CHIP_FLAG_SUSPENDED = BIT(8), TPM_CHIP_FLAG_HWRNG_DISABLED = BIT(9), + TPM_CHIP_FLAG_DISABLE = BIT(10), }; #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev) @@ -297,28 +361,61 @@ struct tpm_header { }; } __packed; -/* A string buffer type for constructing TPM commands. This is based on the - * ideas of string buffer code in security/keys/trusted.h but is heap based - * in order to keep the stack usage minimal. - */ - enum tpm_buf_flags { + /* the capacity exceeded: */ TPM_BUF_OVERFLOW = BIT(0), + /* TPM2B format: */ + TPM_BUF_TPM2B = BIT(1), + /* read out of boundary: */ + TPM_BUF_BOUNDARY_ERROR = BIT(2), }; +/* + * A string buffer type for constructing TPM commands. + */ struct tpm_buf { - unsigned int flags; + u32 flags; + u32 length; u8 *data; + u8 handles; }; enum tpm2_object_attributes { TPM2_OA_FIXED_TPM = BIT(1), + TPM2_OA_ST_CLEAR = BIT(2), TPM2_OA_FIXED_PARENT = BIT(4), + TPM2_OA_SENSITIVE_DATA_ORIGIN = BIT(5), TPM2_OA_USER_WITH_AUTH = BIT(6), + TPM2_OA_ADMIN_WITH_POLICY = BIT(7), + TPM2_OA_NO_DA = BIT(10), + TPM2_OA_ENCRYPTED_DUPLICATION = BIT(11), + TPM2_OA_RESTRICTED = BIT(16), + TPM2_OA_DECRYPT = BIT(17), + TPM2_OA_SIGN = BIT(18), }; +/* + * definitions for the canonical template. These are mandated + * by the TCG key template documents + */ + +#define AES_KEY_BYTES AES_KEYSIZE_128 +#define AES_KEY_BITS (AES_KEY_BYTES*8) +#define TPM2_OA_TMPL (TPM2_OA_NO_DA | \ + TPM2_OA_FIXED_TPM | \ + TPM2_OA_FIXED_PARENT | \ + TPM2_OA_SENSITIVE_DATA_ORIGIN | \ + TPM2_OA_USER_WITH_AUTH | \ + TPM2_OA_DECRYPT | \ + TPM2_OA_RESTRICTED) + enum tpm2_session_attributes { TPM2_SA_CONTINUE_SESSION = BIT(0), + TPM2_SA_AUDIT_EXCLUSIVE = BIT(1), + TPM2_SA_AUDIT_RESET = BIT(3), + TPM2_SA_DECRYPT = BIT(5), + TPM2_SA_ENCRYPT = BIT(6), + TPM2_SA_AUDIT = BIT(7), }; struct tpm2_hash { @@ -326,84 +423,21 @@ struct tpm2_hash { unsigned int tpm_id; }; -static inline void tpm_buf_reset(struct tpm_buf *buf, u16 tag, u32 ordinal) -{ - struct tpm_header *head = (struct tpm_header *)buf->data; - - head->tag = cpu_to_be16(tag); - head->length = cpu_to_be32(sizeof(*head)); - head->ordinal = cpu_to_be32(ordinal); -} - -static inline int tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal) -{ - buf->data = (u8 *)__get_free_page(GFP_KERNEL); - if (!buf->data) - return -ENOMEM; - - buf->flags = 0; - tpm_buf_reset(buf, tag, ordinal); - return 0; -} - -static inline void tpm_buf_destroy(struct tpm_buf *buf) -{ - free_page((unsigned long)buf->data); -} - -static inline u32 tpm_buf_length(struct tpm_buf *buf) -{ - struct tpm_header *head = (struct tpm_header *)buf->data; - - return be32_to_cpu(head->length); -} - -static inline u16 tpm_buf_tag(struct tpm_buf *buf) -{ - struct tpm_header *head = (struct tpm_header *)buf->data; - - return be16_to_cpu(head->tag); -} - -static inline void tpm_buf_append(struct tpm_buf *buf, - const unsigned char *new_data, - unsigned int new_len) -{ - struct tpm_header *head = (struct tpm_header *)buf->data; - u32 len = tpm_buf_length(buf); - - /* Return silently if overflow has already happened. */ - if (buf->flags & TPM_BUF_OVERFLOW) - return; - - if ((len + new_len) > PAGE_SIZE) { - WARN(1, "tpm_buf: overflow\n"); - buf->flags |= TPM_BUF_OVERFLOW; - return; - } - - memcpy(&buf->data[len], new_data, new_len); - head->length = cpu_to_be32(len + new_len); -} - -static inline void tpm_buf_append_u8(struct tpm_buf *buf, const u8 value) -{ - tpm_buf_append(buf, &value, 1); -} - -static inline void tpm_buf_append_u16(struct tpm_buf *buf, const u16 value) -{ - __be16 value2 = cpu_to_be16(value); - - tpm_buf_append(buf, (u8 *) &value2, 2); -} - -static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value) -{ - __be32 value2 = cpu_to_be32(value); - - tpm_buf_append(buf, (u8 *) &value2, 4); -} +int tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal); +void tpm_buf_reset(struct tpm_buf *buf, u16 tag, u32 ordinal); +int tpm_buf_init_sized(struct tpm_buf *buf); +void tpm_buf_reset_sized(struct tpm_buf *buf); +void tpm_buf_destroy(struct tpm_buf *buf); +u32 tpm_buf_length(struct tpm_buf *buf); +void tpm_buf_append(struct tpm_buf *buf, const u8 *new_data, u16 new_length); +void tpm_buf_append_u8(struct tpm_buf *buf, const u8 value); +void tpm_buf_append_u16(struct tpm_buf *buf, const u16 value); +void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value); +u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset); +u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset); +u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset); + +u8 *tpm_buf_parameters(struct tpm_buf *buf); /* * Check if TPM device is in the firmware upgrade mode. @@ -415,7 +449,7 @@ static inline bool tpm_is_firmware_upgrade(struct tpm_chip *chip) static inline u32 tpm2_rc_value(u32 rc) { - return (rc & BIT(7)) ? rc & 0xff : rc; + return (rc & BIT(7)) ? rc & 0xbf : rc; } #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) @@ -429,10 +463,19 @@ extern int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, struct tpm_digest *digest); extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, struct tpm_digest *digests); -extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen); extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max); extern struct tpm_chip *tpm_default_chip(void); void tpm2_flush_context(struct tpm_chip *chip, u32 handle); + +static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle) +{ + /* simple authorization for empty auth */ + tpm_buf_append_u32(buf, 9); /* total length of auth */ + tpm_buf_append_u32(buf, handle); + tpm_buf_append_u16(buf, 0); /* nonce len */ + tpm_buf_append_u8(buf, 0); /* attributes */ + tpm_buf_append_u16(buf, 0); /* hmac len */ +} #else static inline int tpm_is_tpm2(struct tpm_chip *chip) { @@ -450,10 +493,6 @@ static inline int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, return -ENODEV; } -static inline int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen) -{ - return -ENODEV; -} static inline int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max) { return -ENODEV; @@ -463,5 +502,102 @@ static inline struct tpm_chip *tpm_default_chip(void) { return NULL; } + +static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle) +{ +} #endif +#ifdef CONFIG_TCG_TPM2_HMAC + +int tpm2_start_auth_session(struct tpm_chip *chip); +void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, + u32 handle, u8 *name); +void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, + u8 attributes, u8 *passphrase, + int passphraselen); +static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, + struct tpm_buf *buf, + u8 attributes, + u8 *passphrase, + int passphraselen) +{ + tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, + passphraselen); +} +void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); +int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, + int rc); +void tpm2_end_auth_session(struct tpm_chip *chip); +#else +#include <asm/unaligned.h> + +static inline int tpm2_start_auth_session(struct tpm_chip *chip) +{ + return 0; +} +static inline void tpm2_end_auth_session(struct tpm_chip *chip) +{ +} +static inline void tpm_buf_append_name(struct tpm_chip *chip, + struct tpm_buf *buf, + u32 handle, u8 *name) +{ + tpm_buf_append_u32(buf, handle); + /* count the number of handles in the upper bits of flags */ + buf->handles++; +} +static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip, + struct tpm_buf *buf, + u8 attributes, u8 *passphrase, + int passphraselen) +{ + /* offset tells us where the sessions area begins */ + int offset = buf->handles * 4 + TPM_HEADER_SIZE; + u32 len = 9 + passphraselen; + + if (tpm_buf_length(buf) != offset) { + /* not the first session so update the existing length */ + len += get_unaligned_be32(&buf->data[offset]); + put_unaligned_be32(len, &buf->data[offset]); + } else { + tpm_buf_append_u32(buf, len); + } + /* auth handle */ + tpm_buf_append_u32(buf, TPM2_RS_PW); + /* nonce */ + tpm_buf_append_u16(buf, 0); + /* attributes */ + tpm_buf_append_u8(buf, 0); + /* passphrase */ + tpm_buf_append_u16(buf, passphraselen); + tpm_buf_append(buf, passphrase, passphraselen); +} +static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, + struct tpm_buf *buf, + u8 attributes, + u8 *passphrase, + int passphraselen) +{ + int offset = buf->handles * 4 + TPM_HEADER_SIZE; + struct tpm_header *head = (struct tpm_header *) buf->data; + + /* + * if the only sessions are optional, the command tag + * must change to TPM2_ST_NO_SESSIONS + */ + if (tpm_buf_length(buf) == offset) + head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); +} +static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, + struct tpm_buf *buf) +{ +} +static inline int tpm_buf_check_hmac_response(struct tpm_chip *chip, + struct tpm_buf *buf, + int rc) +{ + return rc; +} +#endif /* CONFIG_TCG_TPM2_HMAC */ + #endif diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d68ff9b1247f..9df3e2973626 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -17,6 +17,9 @@ struct dentry; struct bpf_prog; union bpf_attr; +/* Used for event string fields when they are NULL */ +#define EVENT_NULL_STR "(null)" + const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); @@ -103,13 +106,16 @@ struct trace_iterator { unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; - long wait_index; + atomic_t wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; cpumask_var_t started; + /* Set when the file is closed to prevent new waiters */ + bool closed; + /* it's true when current open file is snapshot */ bool snapshot; @@ -759,8 +765,11 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_detach_bpf_prog(struct perf_event *event); int perf_event_query_prog_array(struct perf_event *event, void __user *info); -int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog); -int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog); + +struct bpf_raw_tp_link; +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); + struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, @@ -788,11 +797,12 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info) { return -EOPNOTSUPP; } -static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p) +struct bpf_raw_tp_link; +static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } -static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p) +static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } @@ -903,31 +913,31 @@ void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_free_bpf_prog(struct perf_event *event); -void bpf_trace_run1(struct bpf_prog *prog, u64 arg1); -void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2); -void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run1(struct bpf_raw_tp_link *link, u64 arg1); +void bpf_trace_run2(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2); +void bpf_trace_run3(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3); -void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run4(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4); -void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run5(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5); -void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run6(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6); -void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run7(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); -void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run8(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8); -void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run9(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9); -void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run10(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10); -void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run11(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11); -void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run12(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12); void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index d48cd92d2364..24ea8ac049b4 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -135,7 +135,7 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); # define do_ftrace_record_recursion(ip, pip) do { } while (0) #endif -#ifdef CONFIG_ARCH_WANTS_NO_INSTR +#ifdef CONFIG_FTRACE_VALIDATE_RCU_IS_WATCHING # define trace_warn_on_no_rcu(ip) \ ({ \ bool __ret = !rcu_is_watching(); \ diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h index 7a5fe17b6bf9..d03f74658716 100644 --- a/include/linux/tracefs.h +++ b/include/linux/tracefs.h @@ -62,6 +62,8 @@ struct eventfs_file; typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, const struct file_operations **fops); +typedef void (*eventfs_release)(const char *name, void *data); + /** * struct eventfs_entry - dynamically created eventfs file call back handler * @name: Then name of the dynamic file in an eventfs directory @@ -72,6 +74,7 @@ typedef int (*eventfs_callback)(const char *name, umode_t *mode, void **data, struct eventfs_entry { const char *name; eventfs_callback callback; + eventfs_release release; }; struct eventfs_inode; diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88c0ba623ee6..689b6d71590e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -199,7 +199,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) if (!(cond)) \ return; \ \ - if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle))) \ + if (WARN_ONCE(RCUIDLE_COND(rcuidle), \ + "Bad RCU usage for tracepoint")) \ return; \ \ /* keep srcu and sched-rcu usage consistent */ \ @@ -259,7 +260,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) TP_ARGS(args), \ TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - WARN_ON_ONCE(!rcu_is_watching()); \ + WARN_ONCE(!rcu_is_watching(), \ + "RCU not watching for tracepoint"); \ } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ diff --git a/include/linux/tty.h b/include/linux/tty.h index 8c76fd97d4ad..2372f9357240 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -6,7 +6,6 @@ #include <linux/major.h> #include <linux/termios.h> #include <linux/workqueue.h> -#include <linux/tty_buffer.h> #include <linux/tty_driver.h> #include <linux/tty_ldisc.h> #include <linux/tty_port.h> @@ -146,15 +145,12 @@ struct tty_operations; * @count: count of open processes, reaching zero cancels all the work for * this tty and drops a @kref too (but does not free this tty) * @winsize: size of the terminal "window" (cf. @winsize_mutex) - * @flow: flow settings grouped together, see also @flow.unused + * @flow: flow settings grouped together * @flow.lock: lock for @flow members * @flow.stopped: tty stopped/started by stop_tty()/start_tty() * @flow.tco_stopped: tty stopped/started by %TCOOFF/%TCOON ioctls (it has * precedence over @flow.stopped) - * @flow.unused: alignment for Alpha, so that no members other than @flow.* are - * modified by the same 64b word store. The @flow's __aligned is - * there for the very same reason. - * @ctrl: control settings grouped together, see also @ctrl.unused + * @ctrl: control settings grouped together * @ctrl.lock: lock for @ctrl members * @ctrl.pgrp: process group of this tty (setpgrp(2)) * @ctrl.session: session of this tty (setsid(2)). Writes are protected by both @@ -162,7 +158,6 @@ struct tty_operations; * them. * @ctrl.pktstatus: packet mode status (bitwise OR of %TIOCPKT_ constants) * @ctrl.packet: packet mode enabled - * @ctrl.unused: alignment for Alpha, see @flow.unused for explanation * @hw_stopped: not controlled by the tty layer, under @driver's control for CTS * handling * @receive_room: bytes permitted to feed to @ldisc without any being lost @@ -217,8 +212,7 @@ struct tty_struct { spinlock_t lock; bool stopped; bool tco_stopped; - unsigned long unused[0]; - } __aligned(sizeof(unsigned long)) flow; + } flow; struct { struct pid *pgrp; @@ -226,8 +220,7 @@ struct tty_struct { spinlock_t lock; unsigned char pktstatus; bool packet; - unsigned long unused[0]; - } __aligned(sizeof(unsigned long)) ctrl; + } ctrl; bool hw_stopped; bool closing; diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 7372124fbf90..dd4b31ce6d5d 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -154,6 +154,13 @@ struct serial_struct; * * Optional. Called under the @tty->termios_rwsem. May sleep. * + * @ldisc_ok: ``int ()(struct tty_struct *tty, int ldisc)`` + * + * This routine allows the @tty driver to decide if it can deal + * with a particular @ldisc. + * + * Optional. Called under the @tty->ldisc_sem and @tty->termios_rwsem. + * * @set_ldisc: ``void ()(struct tty_struct *tty)`` * * This routine allows the @tty driver to be notified when the device's @@ -372,6 +379,7 @@ struct tty_operations { void (*hangup)(struct tty_struct *tty); int (*break_ctl)(struct tty_struct *tty, int state); void (*flush_buffer)(struct tty_struct *tty); + int (*ldisc_ok)(struct tty_struct *tty, int ldisc); void (*set_ldisc)(struct tty_struct *tty); void (*wait_until_sent)(struct tty_struct *tty, int timeout); void (*send_xchar)(struct tty_struct *tty, u8 ch); diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index ffe48e69b3f3..457879938fc1 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p) p->v++; } -static inline void u64_stats_init(struct u64_stats_sync *syncp) -{ - seqcount_init(&syncp->seq); -} +#define u64_stats_init(syncp) \ + do { \ + struct u64_stats_sync *__s = (syncp); \ + seqcount_init(&__s->seq); \ + } while (0) static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { diff --git a/include/linux/udp.h b/include/linux/udp.h index d04188714dca..3eb3f2b9a2a0 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -24,7 +24,7 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) } #define UDP_HTABLE_SIZE_MIN_PERNET 128 -#define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) +#define UDP_HTABLE_SIZE_MIN (IS_ENABLED(CONFIG_BASE_SMALL) ? 128 : 256) #define UDP_HTABLE_SIZE_MAX 65536 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) @@ -92,6 +92,9 @@ struct udp_sock { /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ int forward_threshold; + + /* Cache friendly copy of sk->sk_peek_off >= 0 */ + bool peeking_with_offset; }; #define udp_test_bit(nr, sk) \ @@ -105,10 +108,17 @@ struct udp_sock { #define udp_assign_bit(nr, sk, val) \ assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val) -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define UDP_MAX_SEGMENTS (1 << 7UL) #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk) +static inline int udp_set_peek_off(struct sock *sk, int val) +{ + sk_set_peek_off(sk, val); + WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0); + return 0; +} + static inline void udp_set_no_check6_tx(struct sock *sk, bool val) { udp_assign_bit(NO_CHECK6_TX, sk, val); @@ -140,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, } } +DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); +#if IS_ENABLED(CONFIG_IPV6) +DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); +#endif + +static inline bool udp_encap_needed(void) +{ + if (static_branch_unlikely(&udp_encap_needed_key)) + return true; + +#if IS_ENABLED(CONFIG_IPV6) + if (static_branch_unlikely(&udpv6_encap_needed_key)) + return true; +#endif + + return false; +} + static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) { if (!skb_is_gso(skb)) @@ -153,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) !udp_test_bit(ACCEPT_FRAGLIST, sk)) return true; + /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still + * land in a tunnel as the socket check in udp_gro_receive cannot be + * foolproof. + */ + if (udp_encap_needed() && + READ_ONCE(udp_sk(sk)->encap_rcv) && + !(skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM))) + return true; + return false; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 00cebe2b70de..7020adedfa08 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -206,6 +206,16 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) } static __always_inline __must_check +bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i) +{ + size_t copied = copy_to_iter(addr, bytes, i); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; +} + +static __always_inline __must_check bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { size_t copied = copy_from_iter(addr, bytes, i); diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 47c5962b876b..18238dc8bfd3 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -28,19 +28,26 @@ struct uio_map; * logical, virtual, or physical & phys_addr_t * should always be large enough to handle any of * the address types) + * @dma_addr: DMA handle set by dma_alloc_coherent, used with + * UIO_MEM_DMA_COHERENT only (@addr should be the + * void * returned from the same dma_alloc_coherent call) * @offs: offset of device memory within the page * @size: size of IO (multiple of page size) * @memtype: type of memory addr points to * @internal_addr: ioremap-ped version of addr, for driver internal use + * @dma_device: device struct that was passed to dma_alloc_coherent, + * used with UIO_MEM_DMA_COHERENT only * @map: for use by the UIO core only. */ struct uio_mem { const char *name; phys_addr_t addr; + dma_addr_t dma_addr; unsigned long offs; resource_size_t size; int memtype; void __iomem *internal_addr; + struct device *dma_device; struct uio_map *map; }; @@ -158,6 +165,12 @@ extern int __must_check #define UIO_MEM_LOGICAL 2 #define UIO_MEM_VIRTUAL 3 #define UIO_MEM_IOVA 4 +/* + * UIO_MEM_DMA_COHERENT exists for legacy drivers that had been getting by with + * improperly mapping DMA coherent allocations through the other modes. + * Do not use in new drivers. + */ +#define UIO_MEM_DMA_COHERENT 5 /* defines for uio_port->porttype */ #define UIO_PORT_NONE 0 diff --git a/include/linux/units.h b/include/linux/units.h index 45110daaf8d3..00e15de33eca 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -24,10 +24,13 @@ #define NANOHZ_PER_HZ 1000000000UL #define MICROHZ_PER_HZ 1000000UL #define MILLIHZ_PER_HZ 1000UL + #define HZ_PER_KHZ 1000UL -#define KHZ_PER_MHZ 1000UL #define HZ_PER_MHZ 1000000UL +#define KHZ_PER_MHZ 1000UL +#define KHZ_PER_GHZ 1000000UL + #define MILLIWATT_PER_WATT 1000UL #define MICROWATT_PER_MILLIWATT 1000UL #define MICROWATT_PER_WATT 1000000UL diff --git a/include/linux/usb.h b/include/linux/usb.h index 9e52179872a5..1913a13833f2 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -440,11 +440,6 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size, /* ----------------------------------------------------------------------- */ -/* USB device number allocation bitmap */ -struct usb_devmap { - unsigned long devicemap[128 / (8*sizeof(unsigned long))]; -}; - /* * Allocated per bus (tree of devices) we have: */ @@ -472,7 +467,7 @@ struct usb_bus { * round-robin allocation */ struct mutex devnum_next_mutex; /* devnum_next mutex */ - struct usb_devmap devmap; /* device address allocation map */ + DECLARE_BITMAP(devmap, 128); /* USB device number allocation bitmap */ struct usb_device *root_hub; /* Root hub */ struct usb_bus *hs_companion; /* Companion EHCI bus, if any */ diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index ca796dc1a984..6e5555610010 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -82,7 +82,7 @@ struct uac_clock_source_descriptor { #define UAC_CLOCK_SOURCE_TYPE_INT_PROG 0x3 #define UAC_CLOCK_SOURCE_SYNCED_TO_SOF (1 << 2) -/* 4.7.2.2 Clock Source Descriptor */ +/* 4.7.2.2 Clock Selector Descriptor */ struct uac_clock_selector_descriptor { __u8 bLength; @@ -91,7 +91,7 @@ struct uac_clock_selector_descriptor { __u8 bClockID; __u8 bNrInPins; __u8 baCSourceID[]; - /* bmControls and iClockSource omitted */ + /* bmControls and iClockSelector omitted */ } __attribute__((packed)); /* 4.7.2.3 Clock Multiplier Descriptor */ diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 6532beb587b1..56dda8e1562d 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -52,6 +52,7 @@ struct usb_ep; * @short_not_ok: When reading data, makes short packets be * treated as errors (queue stops advancing till cleanup). * @dma_mapped: Indicates if request has been mapped to DMA (internal) + * @sg_was_mapped: Set if the scatterlist has been mapped before the request * @complete: Function called when request completes, so this request and * its buffer may be re-used. The function will always be called with * interrupts disabled, and it must not sleep. @@ -111,6 +112,7 @@ struct usb_request { unsigned zero:1; unsigned short_not_ok:1; unsigned dma_mapped:1; + unsigned sg_was_mapped:1; void (*complete)(struct usb_ep *ep, struct usb_request *req); diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index cd77fc6095a1..ac95e7c89df5 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -55,7 +55,7 @@ struct giveback_urb_bh { bool high_prio; spinlock_t lock; struct list_head head; - struct tasklet_struct bh; + struct work_struct bh; struct usb_host_endpoint *completing_ep; }; diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index 98487fd7ab11..de42f14bd280 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -6,6 +6,7 @@ #ifndef __LINUX_USB_OF_H #define __LINUX_USB_OF_H +#include <linux/usb.h> #include <linux/usb/ch9.h> #include <linux/usb/otg.h> #include <linux/usb/phy.h> @@ -17,6 +18,7 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0); bool of_usb_host_tpl_support(struct device_node *np); int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps); +enum usb_port_connect_type usb_of_get_connect_type(struct usb_device *hub, int port1); struct device_node *usb_of_get_device_node(struct usb_device *hub, int port1); bool usb_of_has_combined_node(struct usb_device *udev); struct device_node *usb_of_get_interface_node(struct usb_device *udev, @@ -37,6 +39,11 @@ static inline int of_usb_update_otg_caps(struct device_node *np, { return 0; } +static inline enum usb_port_connect_type +usb_of_get_connect_type(const struct usb_device *hub, int port1) +{ + return USB_PORT_CONNECT_TYPE_UNKNOWN; +} static inline struct device_node * usb_of_get_device_node(struct usb_device *hub, int port1) { diff --git a/include/linux/usb/onboard_dev.h b/include/linux/usb/onboard_dev.h new file mode 100644 index 000000000000..b79db6d193c8 --- /dev/null +++ b/include/linux/usb/onboard_dev.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_USB_ONBOARD_DEV_H +#define __LINUX_USB_ONBOARD_DEV_H + +struct usb_device; +struct list_head; + +#if IS_ENABLED(CONFIG_USB_ONBOARD_DEV) +void onboard_dev_create_pdevs(struct usb_device *parent_dev, struct list_head *pdev_list); +void onboard_dev_destroy_pdevs(struct list_head *pdev_list); +#else +static inline void onboard_dev_create_pdevs(struct usb_device *parent_dev, + struct list_head *pdev_list) {} +static inline void onboard_dev_destroy_pdevs(struct list_head *pdev_list) {} +#endif + +#endif /* __LINUX_USB_ONBOARD_DEV_H */ diff --git a/include/linux/usb/onboard_hub.h b/include/linux/usb/onboard_hub.h deleted file mode 100644 index d9373230556e..000000000000 --- a/include/linux/usb/onboard_hub.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __LINUX_USB_ONBOARD_HUB_H -#define __LINUX_USB_ONBOARD_HUB_H - -struct usb_device; -struct list_head; - -#if IS_ENABLED(CONFIG_USB_ONBOARD_HUB) -void onboard_hub_create_pdevs(struct usb_device *parent_hub, struct list_head *pdev_list); -void onboard_hub_destroy_pdevs(struct list_head *pdev_list); -#else -static inline void onboard_hub_create_pdevs(struct usb_device *parent_hub, - struct list_head *pdev_list) {} -static inline void onboard_hub_destroy_pdevs(struct list_head *pdev_list) {} -#endif - -#endif /* __LINUX_USB_ONBOARD_HUB_H */ diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index eb626af0e4e7..d50098fb16b5 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -483,6 +483,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_BIST_CONT_MODE 50 /* 30 - 60 ms */ #define PD_T_SINK_TX 16 /* 16 - 20 ms */ #define PD_T_CHUNK_NOT_SUPP 42 /* 40 - 50 ms */ +#define PD_T_VCONN_STABLE 50 #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h index 3a747938cdab..5c48e8a81403 100644 --- a/include/linux/usb/pd_vdo.h +++ b/include/linux/usb/pd_vdo.h @@ -7,6 +7,7 @@ #define __LINUX_USB_PD_VDO_H #include "pd.h" +#include <linux/bitfield.h> /* * VDO : Vendor Defined Message Object @@ -86,12 +87,15 @@ * * Request is simply properly formatted SVDM header * - * Response is 4 data objects: + * Response is 4 data objects for Power Delivery 2.0 and Passive Cables for + * Power Delivery 3.0. Active Cables in Power Delivery 3.0 have 5 data objects. * [0] :: SVDM header * [1] :: Identitiy header * [2] :: Cert Stat VDO * [3] :: (Product | Cable) VDO + * [4] :: Cable VDO 1 * [4] :: AMA VDO + * [5] :: Cable VDO 2 * */ #define VDO_INDEX_HDR 0 @@ -100,6 +104,8 @@ #define VDO_INDEX_CABLE 3 #define VDO_INDEX_PRODUCT 3 #define VDO_INDEX_AMA 4 +#define VDO_INDEX_CABLE_1 4 +#define VDO_INDEX_CABLE_2 5 /* * SVDM Identity Header @@ -150,6 +156,7 @@ #define PD_IDH_MODAL_SUPP(vdo) ((vdo) & (1 << 26)) #define PD_IDH_DFP_PTYPE(vdo) (((vdo) >> 23) & 0x7) #define PD_IDH_CONN_TYPE(vdo) (((vdo) >> 21) & 0x3) +#define PD_IDH_HOST_SUPP(vdo) ((vdo) & (1 << 31)) /* * Cert Stat VDO @@ -182,7 +189,7 @@ * <5:3> :: Alternate modes * <2:0> :: USB highest speed */ -#define PD_VDO_UFP_DEVCAP(vdo) (((vdo) & GENMASK(27, 24)) >> 24) +#define PD_VDO_UFP_DEVCAP(vdo) FIELD_GET(GENMASK(27, 24), vdo) /* UFP VDO Version */ #define UFP_VDO_VER1_2 2 @@ -241,7 +248,7 @@ * <21:5> :: Reserved * <4:0> :: Port number */ -#define PD_VDO_DFP_HOSTCAP(vdo) (((vdo) & GENMASK(26, 24)) >> 24) +#define PD_VDO_DFP_HOSTCAP(vdo) FIELD_GET(GENMASK(26, 24), vdo) #define DFP_VDO_VER1_1 1 #define HOST_USB2_CAPABLE BIT(0) diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 372898d9eeb0..67bfcda6c7d2 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -194,9 +194,4 @@ struct renesas_usbhs_platform_info { struct renesas_usbhs_driver_param driver_param; }; -/* - * macro for platform - */ -#define renesas_usbhs_get_info(pdev)\ - ((struct renesas_usbhs_platform_info *)(pdev)->dev.platform_data) #endif /* RENESAS_USB_H */ diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 467e8045e9f8..47a86b8a4a50 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -145,6 +145,7 @@ #define TCPC_RX_BYTE_CNT 0x30 #define TCPC_RX_BUF_FRAME_TYPE 0x31 #define TCPC_RX_BUF_FRAME_TYPE_SOP 0 +#define TCPC_RX_BUF_FRAME_TYPE_SOP1 1 #define TCPC_RX_HDR 0x32 #define TCPC_RX_DATA 0x34 /* through 0x4f */ @@ -198,12 +199,23 @@ struct tcpci; * Chip level drivers are expected to check for contaminant and call * tcpm_clean_port when the port is clean to put the port back into * toggling state. + * @cable_comm_capable + * optional; Set when TCPC can communicate with cable plugs over SOP' + * @attempt_vconn_swap_discovery: + * Optional; The callback is called by the TCPM when the result of + * a Discover Identity request indicates that the port partner is + * a receptacle capable of modal operation. Chip level TCPCI drivers + * can implement their own policy to determine if and when a Vconn + * swap following Discover Identity on SOP' occurs. + * Return true when the TCPM is allowed to request a Vconn swap + * after Discovery Identity on SOP. */ struct tcpci_data { struct regmap *regmap; unsigned char TX_BUF_BYTE_x_hidden:1; unsigned char auto_discharge_disconnect:1; unsigned char vbus_vsafe0v:1; + unsigned char cable_comm_capable:1; int (*init)(struct tcpci *tcpci, struct tcpci_data *data); int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data, @@ -215,6 +227,7 @@ struct tcpci_data { void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data, bool capable); void (*check_contaminant)(struct tcpci *tcpci, struct tcpci_data *data); + bool (*attempt_vconn_swap_discovery)(struct tcpci *tcpci, struct tcpci_data *data); }; struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data); diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index 65fac5e1f317..061da9546a81 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -119,6 +119,17 @@ enum tcpm_transmit_type { * at the end of the deboumce period or when the port is still * toggling. Chip level drivers are expected to check for contaminant * and call tcpm_clean_port when the port is clean. + * @cable_comm_capable + * Optional; Returns whether cable communication over SOP' is supported + * by the tcpc + * @attempt_vconn_swap_discovery: + * Optional; The callback is called by the TCPM when the result of + * a Discover Identity request indicates that the port partner is + * a receptacle capable of modal operation. Chip level TCPCI drivers + * can implement their own policy to determine if and when a Vconn + * swap following Discover Identity on SOP' occurs. + * Return true when the TCPM is allowed to request a Vconn swap + * after Discovery Identity on SOP. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -133,6 +144,8 @@ struct tcpc_dev { enum typec_cc_status *cc2); int (*set_polarity)(struct tcpc_dev *dev, enum typec_cc_polarity polarity); + int (*set_orientation)(struct tcpc_dev *dev, + enum typec_orientation orientation); int (*set_vconn)(struct tcpc_dev *dev, bool on); int (*set_vbus)(struct tcpc_dev *dev, bool on, bool charge); int (*set_current_limit)(struct tcpc_dev *dev, u32 max_ma, u32 mv); @@ -154,6 +167,8 @@ struct tcpc_dev { bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev); void (*set_partner_usb_comm_capable)(struct tcpc_dev *dev, bool enable); void (*check_contaminant)(struct tcpc_dev *dev); + bool (*cable_comm_capable)(struct tcpc_dev *dev); + bool (*attempt_vconn_swap_discovery)(struct tcpc_dev *dev); }; struct tcpm_port; @@ -166,7 +181,8 @@ void tcpm_cc_change(struct tcpm_port *port); void tcpm_sink_frs(struct tcpm_port *port); void tcpm_sourcing_vbus(struct tcpm_port *port); void tcpm_pd_receive(struct tcpm_port *port, - const struct pd_message *msg); + const struct pd_message *msg, + enum tcpm_transmit_type rx_sop_type); void tcpm_pd_transmit_complete(struct tcpm_port *port, enum tcpm_transmit_status status); void tcpm_pd_hard_reset(struct tcpm_port *port); diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h index 46e73584b6e6..e6c14f2b1f9b 100644 --- a/include/linux/usb/tegra_usb_phy.h +++ b/include/linux/usb/tegra_usb_phy.h @@ -7,11 +7,12 @@ #define __TEGRA_USB_PHY_H #include <linux/clk.h> -#include <linux/gpio.h> #include <linux/regmap.h> #include <linux/reset.h> #include <linux/usb/otg.h> +struct gpio_desc; + /* * utmi_pll_config_in_car_module: true if the UTMI PLL configuration registers * should be set up by clk-tegra, false if by the PHY code diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index a05d6f6f2536..b35b427561ab 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -18,6 +18,7 @@ struct typec_cable; struct typec_plug; struct typec_port; struct typec_altmode_ops; +struct typec_cable_ops; struct fwnode_handle; struct device; @@ -157,6 +158,9 @@ void typec_port_register_altmodes(struct typec_port *port, const struct typec_altmode_ops *ops, void *drvdata, struct typec_altmode **altmodes, size_t n); +void typec_port_register_cable_ops(struct typec_altmode **altmodes, int max_altmodes, + const struct typec_cable_ops *ops); + void typec_unregister_altmode(struct typec_altmode *altmode); struct typec_port *typec_altmode2port(struct typec_altmode *alt); @@ -333,6 +337,9 @@ void typec_partner_set_svdm_version(struct typec_partner *partner, enum usb_pd_svdm_ver svdm_version); int typec_get_negotiated_svdm_version(struct typec_port *port); +int typec_get_cable_svdm_version(struct typec_port *port); +void typec_cable_set_svdm_version(struct typec_cable *cable, enum usb_pd_svdm_ver svdm_version); + struct usb_power_delivery *typec_partner_usb_power_delivery_register(struct typec_partner *partner, struct usb_power_delivery_desc *desc); diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h index 28aeef8f9e7b..b3c0866ea70f 100644 --- a/include/linux/usb/typec_altmode.h +++ b/include/linux/usb/typec_altmode.h @@ -20,6 +20,7 @@ struct typec_altmode_ops; * @active: Tells has the mode been entered or not * @desc: Optional human readable description of the mode * @ops: Operations vector from the driver + * @cable_ops: Cable operations vector from the driver. */ struct typec_altmode { struct device dev; @@ -30,6 +31,7 @@ struct typec_altmode { char *desc; const struct typec_altmode_ops *ops; + const struct typec_cable_ops *cable_ops; }; #define to_typec_altmode(d) container_of(d, struct typec_altmode, dev) @@ -75,6 +77,34 @@ int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf, const struct typec_altmode * typec_altmode_get_partner(struct typec_altmode *altmode); +/** + * struct typec_cable_ops - Cable alternate mode operations vector + * @enter: Operations to be executed with Enter Mode Command + * @exit: Operations to be executed with Exit Mode Command + * @vdm: Callback for SVID specific commands + */ +struct typec_cable_ops { + int (*enter)(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo); + int (*exit)(struct typec_altmode *altmode, enum typec_plug_index sop); + int (*vdm)(struct typec_altmode *altmode, enum typec_plug_index sop, + const u32 hdr, const u32 *vdo, int cnt); +}; + +int typec_cable_altmode_enter(struct typec_altmode *altmode, enum typec_plug_index sop, u32 *vdo); +int typec_cable_altmode_exit(struct typec_altmode *altmode, enum typec_plug_index sop); +int typec_cable_altmode_vdm(struct typec_altmode *altmode, enum typec_plug_index sop, + const u32 header, const u32 *vdo, int count); + +/** + * typec_altmode_get_cable_svdm_version - Get negotiated SVDM version for cable plug + * @altmode: Handle to the alternate mode + */ +static inline int +typec_altmode_get_cable_svdm_version(struct typec_altmode *altmode) +{ + return typec_get_cable_svdm_version(typec_altmode2port(altmode)); +} + /* * These are the connector states (USB, Safe and Alt Mode) defined in USB Type-C * Specification. SVID specific connector states are expected to follow and diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index 1f358098522d..f2da264d9c14 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -3,6 +3,7 @@ #define __USB_TYPEC_DP_H #include <linux/usb/typec_altmode.h> +#include <linux/bitfield.h> #define USB_TYPEC_DP_SID 0xff01 /* USB IF has not assigned a Standard ID (SID) for VirtualLink, @@ -67,21 +68,21 @@ enum { #define DP_CAP_UFP_D 1 #define DP_CAP_DFP_D 2 #define DP_CAP_DFP_D_AND_UFP_D 3 -#define DP_CAP_DP_SIGNALLING(_cap_) (((_cap_) & GENMASK(5, 2)) >> 2) +#define DP_CAP_DP_SIGNALLING(_cap_) FIELD_GET(GENMASK(5, 2), _cap_) #define DP_CAP_SIGNALLING_HBR3 1 #define DP_CAP_SIGNALLING_UHBR10 2 #define DP_CAP_SIGNALLING_UHBR20 3 #define DP_CAP_RECEPTACLE BIT(6) #define DP_CAP_USB BIT(7) -#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(15, 8)) >> 8) -#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) (((_cap_) & GENMASK(23, 16)) >> 16) +#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(15, 8), _cap_) +#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_) FIELD_GET(GENMASK(23, 16), _cap_) /* Get pin assignment taking plug & receptacle into consideration */ #define DP_CAP_PIN_ASSIGN_UFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_UFP_D_PIN_ASSIGN(_cap_) : DP_CAP_DFP_D_PIN_ASSIGN(_cap_)) #define DP_CAP_PIN_ASSIGN_DFP_D(_cap_) ((_cap_ & DP_CAP_RECEPTACLE) ? \ DP_CAP_DFP_D_PIN_ASSIGN(_cap_) : DP_CAP_UFP_D_PIN_ASSIGN(_cap_)) #define DP_CAP_UHBR_13_5_SUPPORT BIT(26) -#define DP_CAP_CABLE_TYPE(_cap_) (((_cap_) & GENMASK(29, 28)) >> 28) +#define DP_CAP_CABLE_TYPE(_cap_) FIELD_GET(GENMASK(29, 28), _cap_) #define DP_CAP_CABLE_TYPE_PASSIVE 0 #define DP_CAP_CABLE_TYPE_RE_TIMER 1 #define DP_CAP_CABLE_TYPE_RE_DRIVER 2 @@ -116,7 +117,7 @@ enum { /* Helper for setting/getting the pin assignment value to the configuration */ #define DP_CONF_SET_PIN_ASSIGN(_a_) ((_a_) << 8) -#define DP_CONF_GET_PIN_ASSIGN(_conf_) (((_conf_) & GENMASK(15, 8)) >> 8) +#define DP_CONF_GET_PIN_ASSIGN(_conf_) FIELD_GET(GENMASK(15, 8), _conf_) #define DP_CONF_UHBR13_5_SUPPORT BIT(26) #define DP_CONF_CABLE_TYPE_MASK GENMASK(29, 28) #define DP_CONF_CABLE_TYPE_SHIFT 28 diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index c7a2153bd6f5..fa97d7e00f5c 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -3,6 +3,7 @@ #define __USB_TYPEC_TBT_H #include <linux/usb/typec_altmode.h> +#include <linux/bitfield.h> #define USB_TYPEC_VENDOR_INTEL 0x8087 /* Alias for convenience */ @@ -25,7 +26,7 @@ struct typec_thunderbolt_data { /* TBT3 Device Discover Mode VDO bits */ #define TBT_MODE BIT(0) -#define TBT_ADAPTER(_vdo_) (((_vdo_) & BIT(16)) >> 16) +#define TBT_ADAPTER(_vdo_) FIELD_GET(BIT(16), _vdo_) #define TBT_ADAPTER_LEGACY 0 #define TBT_ADAPTER_TBT3 1 #define TBT_INTEL_SPECIFIC_B0 BIT(26) @@ -35,12 +36,12 @@ struct typec_thunderbolt_data { #define TBT_SET_ADAPTER(a) (((a) & 1) << 16) /* TBT3 Cable Discover Mode VDO bits */ -#define TBT_CABLE_SPEED(_vdo_) (((_vdo_) & GENMASK(18, 16)) >> 16) +#define TBT_CABLE_SPEED(_vdo_) FIELD_GET(GENMASK(18, 16), _vdo_) #define TBT_CABLE_USB3_GEN1 1 #define TBT_CABLE_USB3_PASSIVE 2 #define TBT_CABLE_10_AND_20GBPS 3 -#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) \ - (((_vdo_) & GENMASK(20, 19)) >> 19) +#define TBT_CABLE_ROUNDED_SUPPORT(_vdo_) FIELD_GET(GENMASK(20, 19), _vdo_) + #define TBT_GEN3_NON_ROUNDED 0 #define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1 #define TBT_CABLE_OPTICAL BIT(21) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index e4056547fbe6..05d59f74fc88 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -36,6 +36,52 @@ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) +/* + * Start with fault_pending_wqh and fault_wqh so they're more likely + * to be in the same cacheline. + * + * Locking order: + * fd_wqh.lock + * fault_pending_wqh.lock + * fault_wqh.lock + * event_wqh.lock + * + * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, + * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's + * also taken in IRQ context. + */ +struct userfaultfd_ctx { + /* waitqueue head for the pending (i.e. not read) userfaults */ + wait_queue_head_t fault_pending_wqh; + /* waitqueue head for the userfaults */ + wait_queue_head_t fault_wqh; + /* waitqueue head for the pseudo fd to wakeup poll/read */ + wait_queue_head_t fd_wqh; + /* waitqueue head for events */ + wait_queue_head_t event_wqh; + /* a refile sequence protected by fault_pending_wqh lock */ + seqcount_spinlock_t refile_seq; + /* pseudo fd refcounting */ + refcount_t refcount; + /* userfaultfd syscall flags */ + unsigned int flags; + /* features requested from the userspace */ + unsigned int features; + /* released */ + bool released; + /* + * Prevents userfaultfd operations (fill/move/wp) from happening while + * some non-cooperative event(s) is taking place. Increments are done + * in write-mode. Whereas, userfaultfd operations, which includes + * reading mmap_changing, is done under read-mode. + */ + struct rw_semaphore map_changing_lock; + /* memory mappings are changing because of non-cooperative event */ + atomic_t mmap_changing; + /* mm with one ore more vmas attached to this userfaultfd_ctx */ + struct mm_struct *mm; +}; + extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); /* A combined operation mode + behavior flags. */ @@ -74,31 +120,26 @@ extern int mfill_atomic_install_pte(pmd_t *dst_pmd, unsigned long dst_addr, struct page *page, bool newly_allocated, uffd_flags_t flags); -extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start, +extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long src_start, unsigned long len, - atomic_t *mmap_changing, uffd_flags_t flags); -extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, + uffd_flags_t flags); +extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, unsigned long dst_start, - unsigned long len, - atomic_t *mmap_changing); -extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start, - unsigned long len, atomic_t *mmap_changing, - uffd_flags_t flags); -extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, atomic_t *mmap_changing, - uffd_flags_t flags); -extern int mwriteprotect_range(struct mm_struct *dst_mm, - unsigned long start, unsigned long len, - bool enable_wp, atomic_t *mmap_changing); + unsigned long len); +extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, + unsigned long len, uffd_flags_t flags); +extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, + unsigned long len, uffd_flags_t flags); +extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, + unsigned long len, bool enable_wp); extern long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); /* move_pages */ void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); -ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm, - unsigned long dst_start, unsigned long src_start, - unsigned long len, __u64 flags); +ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, + unsigned long src_start, unsigned long len, __u64 flags); int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index db15ac07f8a6..7977ca03ac7a 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -7,6 +7,7 @@ #include <linux/interrupt.h> #include <linux/vhost_iotlb.h> #include <linux/virtio_net.h> +#include <linux/virtio_blk.h> #include <linux/if_ether.h> /** @@ -195,6 +196,10 @@ struct vdpa_map_file { * @idx: virtqueue index * Returns int: irq number of a virtqueue, * negative number if no irq assigned. + * @get_vq_size: Get the size of a specific virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Return u16: the size of the virtqueue * @get_vq_align: Get the virtqueue align requirement * for the device * @vdev: vdpa device @@ -386,6 +391,7 @@ struct vdpa_config_ops { (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* vq irq is not expected to be changed once DRIVER_OK is set */ int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx); + u16 (*get_vq_size)(struct vdpa_device *vdev, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 89b265bc6ec3..8b1a29820409 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -356,6 +356,7 @@ struct virqfd { wait_queue_entry_t wait; poll_table pt; struct work_struct shutdown; + struct work_struct flush_inject; struct virqfd **pvirqfd; }; @@ -363,5 +364,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *), void (*thread)(void *, void *), void *data, struct virqfd **pvirqfd, int fd); void vfio_virqfd_disable(struct virqfd **pvirqfd); +void vfio_virqfd_flush_thread(struct virqfd **pvirqfd); #endif /* VFIO_H */ diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 85e84b92751b..a2c8b8bba711 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -130,7 +130,15 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); - +ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, + void __iomem *io, char __user *buf, + loff_t off, size_t count, size_t x_start, + size_t x_end, bool iswrite); +bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, + loff_t reg_start, size_t reg_cnt, + loff_t *buf_offset, + size_t *intersect_count, + size_t *register_offset); #define VFIO_IOWRITE_DECLATION(size) \ int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size val, void __iomem *io); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b0201747a263..96fea920873b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -126,6 +126,8 @@ struct virtio_admin_cmd { * @vqs: the list of virtqueues for this device. * @features: the features supported by both driver and device. * @priv: private pointer for the driver's use. + * @debugfs_dir: debugfs directory entry. + * @debugfs_filter_features: features to be filtered set by debugfs. */ struct virtio_device { int index; @@ -141,6 +143,10 @@ struct virtio_device { struct list_head vqs; u64 features; void *priv; +#ifdef CONFIG_VIRTIO_DEBUG + struct dentry *debugfs_dir; + u64 debugfs_filter_features; +#endif }; #define dev_to_virtio(_dev) container_of_const(_dev, struct virtio_device, dev) @@ -170,7 +176,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); /** * struct virtio_driver - operations for a virtio I/O driver - * @driver: underlying device driver (populate name and owner). + * @driver: underlying device driver (populate name). * @id_table: the ids serviced by this driver. * @feature_table: an array of feature numbers supported by this driver. * @feature_table_size: number of entries in the feature table array. @@ -208,7 +214,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) return container_of(drv, struct virtio_driver, driver); } -int register_virtio_driver(struct virtio_driver *drv); +/* use a macro to avoid include chaining to get THIS_MODULE */ +#define register_virtio_driver(drv) \ + __register_virtio_driver(drv, THIS_MODULE) +int __register_virtio_driver(struct virtio_driver *drv, struct module *owner); void unregister_virtio_driver(struct virtio_driver *drv); /* module_virtio_driver() - Helper macro for drivers that don't do @@ -234,4 +243,33 @@ void virtqueue_dma_sync_single_range_for_cpu(struct virtqueue *_vq, dma_addr_t a void virtqueue_dma_sync_single_range_for_device(struct virtqueue *_vq, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir); + +#ifdef CONFIG_VIRTIO_DEBUG +void virtio_debug_device_init(struct virtio_device *dev); +void virtio_debug_device_exit(struct virtio_device *dev); +void virtio_debug_device_filter_features(struct virtio_device *dev); +void virtio_debug_init(void); +void virtio_debug_exit(void); +#else +static inline void virtio_debug_device_init(struct virtio_device *dev) +{ +} + +static inline void virtio_debug_device_exit(struct virtio_device *dev) +{ +} + +static inline void virtio_debug_device_filter_features(struct virtio_device *dev) +{ +} + +static inline void virtio_debug_init(void) +{ +} + +static inline void virtio_debug_exit(void) +{ +} +#endif + #endif /* _LINUX_VIRTIO_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c720be70c8dd..e4a631ec430b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -2,6 +2,8 @@ #ifndef _LINUX_VMALLOC_H #define _LINUX_VMALLOC_H +#include <linux/alloc_tag.h> +#include <linux/sched.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/list.h> @@ -35,6 +37,7 @@ struct iov_iter; /* in uio.h */ #else #define VM_DEFER_KMEMLEAK 0 #endif +#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */ /* bits [20..32] reserved for arch specific ioremap internals */ @@ -137,26 +140,54 @@ extern unsigned long vmalloc_nr_pages(void); static inline unsigned long vmalloc_nr_pages(void) { return 0; } #endif -extern void *vmalloc(unsigned long size) __alloc_size(1); -extern void *vzalloc(unsigned long size) __alloc_size(1); -extern void *vmalloc_user(unsigned long size) __alloc_size(1); -extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1); -extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1); -extern void *vmalloc_32(unsigned long size) __alloc_size(1); -extern void *vmalloc_32_user(unsigned long size) __alloc_size(1); -extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -extern void *__vmalloc_node_range(unsigned long size, unsigned long align, +extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); +#define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) + +extern void *vzalloc_noprof(unsigned long size) __alloc_size(1); +#define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__)) + +extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1); +#define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__)) + +extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1); +#define vmalloc_node(...) alloc_hooks(vmalloc_node_noprof(__VA_ARGS__)) + +extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1); +#define vzalloc_node(...) alloc_hooks(vzalloc_node_noprof(__VA_ARGS__)) + +extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1); +#define vmalloc_32(...) alloc_hooks(vmalloc_32_noprof(__VA_ARGS__)) + +extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1); +#define vmalloc_32_user(...) alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__)) + +extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); +#define __vmalloc(...) alloc_hooks(__vmalloc_noprof(__VA_ARGS__)) + +extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) __alloc_size(1); -void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, +#define __vmalloc_node_range(...) alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__)) + +void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); -void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1); +#define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) + +void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); +#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) + +extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +#define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) + +extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2); +#define vmalloc_array(...) alloc_hooks(vmalloc_array_noprof(__VA_ARGS__)) + +extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); +#define __vcalloc(...) alloc_hooks(__vcalloc_noprof(__VA_ARGS__)) -extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); -extern void *vmalloc_array(size_t n, size_t size) __alloc_size(1, 2); -extern void *__vcalloc(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); -extern void *vcalloc(size_t n, size_t size) __alloc_size(1, 2); +extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); +#define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); @@ -232,6 +263,10 @@ static inline bool is_vm_area_hugepages(const void *addr) } #ifdef CONFIG_MMU +int vm_area_map_pages(struct vm_struct *area, unsigned long start, + unsigned long end, struct page **pages); +void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, + unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { @@ -253,7 +288,6 @@ extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count); /* * Internals. Don't use.. */ -extern struct list_head vmap_area_list; extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h new file mode 100644 index 000000000000..e1dec1a6a749 --- /dev/null +++ b/include/linux/vmcore_info.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VMCORE_INFO_H +#define LINUX_VMCORE_INFO_H + +#include <linux/linkage.h> +#include <linux/elfcore.h> +#include <linux/elf.h> + +#define CRASH_CORE_NOTE_NAME "CORE" +#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) + +/* + * The per-cpu notes area is a list of notes terminated by a "NULL" + * note header. For kdump, the code in vmcore.c runs in the context + * of the second kernel to combine them into one note. + */ +#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + CRASH_CORE_NOTE_NAME_BYTES + \ + CRASH_CORE_NOTE_DESC_BYTES) + +#define VMCOREINFO_BYTES PAGE_SIZE +#define VMCOREINFO_NOTE_NAME "VMCOREINFO" +#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + VMCOREINFO_NOTE_NAME_BYTES + \ + VMCOREINFO_BYTES) + +typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; +/* Per cpu memory for storing cpu states in case of system crash. */ +extern note_buf_t __percpu *crash_notes; + +void crash_update_vmcoreinfo_safecopy(void *ptr); +void crash_save_vmcoreinfo(void); +void arch_crash_save_vmcoreinfo(void); +__printf(1, 2) +void vmcoreinfo_append_str(const char *fmt, ...); +phys_addr_t paddr_vmcoreinfo_note(void); + +#define VMCOREINFO_OSRELEASE(value) \ + vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_BUILD_ID() \ + ({ \ + static_assert(sizeof(vmlinux_build_id) == 20); \ + vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \ + }) + +#define VMCOREINFO_PAGESIZE(value) \ + vmcoreinfo_append_str("PAGESIZE=%ld\n", value) +#define VMCOREINFO_SYMBOL(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ARRAY(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) +#define VMCOREINFO_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(name)) +#define VMCOREINFO_STRUCT_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(struct name)) +#define VMCOREINFO_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_TYPE_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(name, field)) +#define VMCOREINFO_LENGTH(name, value) \ + vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) +#define VMCOREINFO_NUMBER(name) \ + vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) +#define VMCOREINFO_CONFIG(name) \ + vmcoreinfo_append_str("CONFIG_%s=y\n", #name) + +extern unsigned char *vmcoreinfo_data; +extern size_t vmcoreinfo_size; +extern u32 *vmcoreinfo_note; + +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len); +void final_note(Elf_Word *buf); +#endif /* LINUX_VMCORE_INFO_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 343906a98d6e..735eae6e272c 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -487,14 +487,6 @@ static inline void node_stat_sub_folio(struct folio *folio, mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio)); } -static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages, - int migratetype) -{ - __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages); - if (is_migrate_cma(migratetype)) - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); -} - extern const char * const vmstat_text[]; static inline const char *zone_stat_name(enum zone_stat_item item) diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h index c1f5aebef170..d008c3d0a9bb 100644 --- a/include/linux/vt_kern.h +++ b/include/linux/vt_kern.h @@ -25,7 +25,8 @@ extern int fg_console, last_console, want_console; int vc_allocate(unsigned int console); int vc_cons_allocated(unsigned int console); -int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines); +int __vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines, + bool from_user); struct vc_data *vc_deallocate(unsigned int console); void reset_palette(struct vc_data *vc); void do_blank_screen(int entering_gfx); @@ -42,6 +43,12 @@ void redraw_screen(struct vc_data *vc, int is_switch); #define update_screen(x) redraw_screen(x, 0) #define switch_screen(x) redraw_screen(x, 1) +static inline int vc_resize(struct vc_data *vc, unsigned int cols, + unsigned int lines) +{ + return __vc_resize(vc, cols, lines, false); +} + struct tty_struct; int tioclinux(struct tty_struct *tty, unsigned long arg); @@ -168,7 +175,4 @@ void vt_set_led_state(unsigned int console, int leds); void vt_kbd_con_start(unsigned int console); void vt_kbd_con_stop(unsigned int console); -void vc_scrolldelta_helper(struct vc_data *c, int lines, - unsigned int rolled_over, void *_base, unsigned int size); - #endif /* _VT_KERN_H */ diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 3684487d01e1..29dd5b91dd7d 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -5,10 +5,6 @@ #include <linux/context_tracking_state.h> #include <linux/sched.h> -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#include <asm/vtime.h> -#endif - /* * Common vtime APIs */ @@ -18,7 +14,6 @@ extern void vtime_account_idle(struct task_struct *tsk); #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void arch_vtime_task_switch(struct task_struct *tsk); extern void vtime_user_enter(struct task_struct *tsk); extern void vtime_user_exit(struct task_struct *tsk); extern void vtime_guest_enter(struct task_struct *tsk); diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h index 4ca2842d2842..6a5bb052fcc2 100644 --- a/include/linux/win_minmax.h +++ b/include/linux/win_minmax.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** - * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. +/* + * win_minmax.h: windowed min/max tracker by Kathleen Nichols. * */ #ifndef MINMAX_H diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 686291b87852..63cca3b58d6d 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -48,7 +48,8 @@ u8 wmidev_instance_count(struct wmi_device *wdev); * struct wmi_driver - WMI driver structure * @driver: Driver model structure * @id_table: List of WMI GUIDs supported by this driver - * @no_notify_data: WMI events provide no event data + * @no_notify_data: Driver supports WMI events which provide no event data + * @no_singleton: Driver can be instantiated multiple times * @probe: Callback for device binding * @remove: Callback for device unbinding * @notify: Callback for receiving WMI events @@ -59,6 +60,7 @@ struct wmi_driver { struct device_driver driver; const struct wmi_device_id *id_table; bool no_notify_data; + bool no_singleton; int (*probe)(struct wmi_device *wdev, const void *context); void (*remove)(struct wmi_device *wdev); diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h new file mode 100644 index 000000000000..4ca1ba66d2f0 --- /dev/null +++ b/include/linux/wordpart.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_WORDPART_H +#define _LINUX_WORDPART_H + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) + +/** + * upper_16_bits - return bits 16-31 of a number + * @n: the number we're accessing + */ +#define upper_16_bits(n) ((u16)((n) >> 16)) + +/** + * lower_16_bits - return bits 0-15 of a number + * @n: the number we're accessing + */ +#define lower_16_bits(n) ((u16)((n) & 0xffff)) + +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +/* Set bits in the first 'n' bytes when loaded from memory */ +#ifdef __LITTLE_ENDIAN +# define aligned_byte_mask(n) ((1UL << 8*(n))-1) +#else +# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) +#endif + +#endif // _LINUX_WORDPART_H diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2cc0a9606175..fb3993894536 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -22,20 +22,57 @@ */ #define work_data_bits(work) ((unsigned long *)(&(work)->data)) -enum { +enum work_bits { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_INACTIVE_BIT= 1, /* work item is inactive */ - WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ - WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ + WORK_STRUCT_INACTIVE_BIT, /* work item is inactive */ + WORK_STRUCT_PWQ_BIT, /* data points to pwq */ + WORK_STRUCT_LINKED_BIT, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ -#else - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT, /* static initializer (debugobjects) */ #endif + WORK_STRUCT_FLAG_BITS, + /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_STRUCT_COLOR_BITS = 4, + /* + * When WORK_STRUCT_PWQ is set, reserve 8 bits off of pwq pointer w/ + * debugobjects turned off. This makes pwqs aligned to 256 bytes (512 + * bytes w/ DEBUG_OBJECTS_WORK) and allows 16 workqueue flush colors. + * + * MSB + * [ pwq pointer ] [ flush color ] [ STRUCT flags ] + * 4 bits 4 or 5 bits + */ + WORK_STRUCT_PWQ_SHIFT = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, + + /* + * data contains off-queue information when !WORK_STRUCT_PWQ. + * + * MSB + * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ] + * 16 bits 1 bit 4 or 5 bits + */ + WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_BH_BIT = WORK_OFFQ_FLAG_SHIFT, + WORK_OFFQ_FLAG_END, + WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, + + WORK_OFFQ_DISABLE_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_DISABLE_BITS = 16, + + /* + * When a work item is off queue, the high bits encode off-queue flags + * and the last pool it was on. Cap pool ID to 31 bits and use the + * highest number to indicate that no pool is associated. + */ + WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_DISABLE_SHIFT + WORK_OFFQ_DISABLE_BITS, + WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, + WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, +}; + +enum work_flags { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_INACTIVE = 1 << WORK_STRUCT_INACTIVE_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, @@ -45,35 +82,14 @@ enum { #else WORK_STRUCT_STATIC = 0, #endif +}; +enum wq_misc_consts { WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS), /* not bound to any CPU, prefer the local CPU */ WORK_CPU_UNBOUND = NR_CPUS, - /* - * Reserve 8 bits off of pwq pointer w/ debugobjects turned off. - * This makes pwqs aligned to 256 bytes and allows 16 workqueue - * flush colors. - */ - WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + - WORK_STRUCT_COLOR_BITS, - - /* data contains off-queue information when !WORK_STRUCT_PWQ */ - WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - - __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, - - /* - * When a work item is off queue, its high bits point to the last - * pool it was on. Cap at 31 bits and use the highest number to - * indicate that no pool is associated. - */ - WORK_OFFQ_FLAG_BITS = 1, - WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, - WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, - WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_RUNNING = 1 << 1, @@ -83,12 +99,12 @@ enum { }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING) +#define WORK_OFFQ_BH (1ul << WORK_OFFQ_BH_BIT) +#define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) +#define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) - -#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1) -#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) +#define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) #define WORK_DATA_INIT() ATOMIC_LONG_INIT((unsigned long)WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ @@ -169,6 +185,9 @@ struct workqueue_attrs { * Below fields aren't properties of a worker_pool. They only modify how * :c:func:`apply_workqueue_attrs` select pools and thus don't * participate in pool hash calculations or equality comparisons. + * + * If @affn_strict is set, @cpumask isn't a property of a worker_pool + * either. */ /** @@ -347,7 +366,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * Workqueue flags and constants. For details, please refer to * Documentation/core-api/workqueue.rst. */ -enum { +enum wq_flags { + WQ_BH = 1 << 0, /* execute in bottom half (softirq) context */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ @@ -386,11 +406,22 @@ enum { __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ - __WQ_ORDERED_EXPLICIT = 1 << 19, /* internal: alloc_ordered_workqueue() */ + /* BH wq only allows the following flags */ + __WQ_BH_ALLOWS = WQ_BH | WQ_HIGHPRI, +}; + +enum wq_consts { WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, + + /* + * Per-node default cap on min_active. Unless explicitly set, min_active + * is set to min(max_active, WQ_DFL_MIN_ACTIVE). For more details, see + * workqueue_struct->min_active definition. + */ + WQ_DFL_MIN_ACTIVE = 8, }; /* @@ -420,6 +451,9 @@ enum { * they are same as their non-power-efficient counterparts - e.g. * system_power_efficient_wq is identical to system_wq if * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. + * + * system_bh[_highpri]_wq are convenience interface to softirq. BH work items + * are executed in the queueing CPU's BH context in the queueing order. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_highpri_wq; @@ -428,16 +462,43 @@ extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq; +extern struct workqueue_struct *system_bh_wq; +extern struct workqueue_struct *system_bh_highpri_wq; + +void workqueue_softirq_action(bool highpri); +void workqueue_softirq_dead(unsigned int cpu); /** * alloc_workqueue - allocate a workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags - * @max_active: max in-flight work items per CPU, 0 for default - * remaining args: args for @fmt + * @max_active: max in-flight work items, 0 for default + * @...: args for @fmt + * + * For a per-cpu workqueue, @max_active limits the number of in-flight work + * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be + * executing at most one work item for the workqueue. + * + * For unbound workqueues, @max_active limits the number of in-flight work items + * for the whole system. e.g. @max_active of 16 indicates that that there can be + * at most 16 work items executing for the workqueue in the whole system. + * + * As sharing the same active counter for an unbound workqueue across multiple + * NUMA nodes can be expensive, @max_active is distributed to each NUMA node + * according to the proportion of the number of online CPUs and enforced + * independently. * - * Allocate a workqueue with the specified parameters. For detailed - * information on WQ_* flags, please refer to + * Depending on online CPU distribution, a node may end up with per-node + * max_active which is significantly lower than @max_active, which can lead to + * deadlocks if the per-node concurrency limit is lower than the maximum number + * of interdependent work items for the workqueue. + * + * To guarantee forward progress regardless of online CPU distribution, the + * concurrency limit on every node is guaranteed to be equal to or greater than + * min_active which is set to min(@max_active, %WQ_DFL_MIN_ACTIVE). This means + * that the sum of per-node max_active's may be larger than @max_active. + * + * For detailed information on %WQ_* flags, please refer to * Documentation/core-api/workqueue.rst. * * RETURNS: @@ -460,8 +521,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue(fmt, flags, args...) \ - alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | \ - __WQ_ORDERED_EXPLICIT | (flags), 1, ##args) + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) #define create_workqueue(name) \ alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) @@ -471,6 +531,9 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); #define create_singlethread_workqueue(name) \ alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) +#define from_work(var, callback_work, work_fieldname) \ + container_of(callback_work, typeof(*var), work_fieldname) + extern void destroy_workqueue(struct workqueue_struct *wq); struct workqueue_attrs *alloc_workqueue_attrs(void); @@ -504,10 +567,20 @@ extern bool flush_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); +extern bool disable_work(struct work_struct *work); +extern bool disable_work_sync(struct work_struct *work); +extern bool enable_work(struct work_struct *work); + +extern bool disable_delayed_work(struct delayed_work *dwork); +extern bool disable_delayed_work_sync(struct delayed_work *dwork); +extern bool enable_delayed_work(struct delayed_work *dwork); + extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); +extern void workqueue_set_min_active(struct workqueue_struct *wq, + int min_active); extern struct work_struct *current_work(void); extern bool current_is_workqueue_rescuer(void); extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); @@ -609,6 +682,32 @@ static inline bool schedule_work(struct work_struct *work) return queue_work(system_wq, work); } +/** + * enable_and_queue_work - Enable and queue a work item on a specific workqueue + * @wq: The target workqueue + * @work: The work item to be enabled and queued + * + * This function combines the operations of enable_work() and queue_work(), + * providing a convenient way to enable and queue a work item in a single call. + * It invokes enable_work() on @work and then queues it if the disable depth + * reached 0. Returns %true if the disable depth reached 0 and @work is queued, + * and %false otherwise. + * + * Note that @work is always queued when disable depth reaches zero. If the + * desired behavior is queueing only if certain events took place while @work is + * disabled, the user should implement the necessary state tracking and perform + * explicit conditional queueing after enable_work(). + */ +static inline bool enable_and_queue_work(struct workqueue_struct *wq, + struct work_struct *work) +{ + if (enable_work(work)) { + queue_work(wq, work); + return true; + } + return false; +} + /* * Detect attempt to flush system-wide workqueues at compile time when possible. * Warn attempt to flush system-wide workqueues at runtime. diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 453736fd1d23..112d806ddbe4 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,6 +11,7 @@ #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> +#include <linux/pagevec.h> struct bio; @@ -40,6 +41,7 @@ enum writeback_sync_modes { * in a manner such that unspecified fields are set to zero. */ struct writeback_control { + /* public fields that can be set and/or consumed by the caller: */ long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ @@ -77,6 +79,11 @@ struct writeback_control { */ struct swap_iocb **swap_plug; + /* internal fields used by the ->writepages implementation: */ + struct folio_batch fbatch; + pgoff_t index; + int saved_err; + #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb; /* wb this writeback is issued under */ struct inode *inode; /* inode being written out */ @@ -348,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); +unsigned long cgwb_calc_thresh(struct bdi_writeback *wb); void wb_update_bandwidth(struct bdi_writeback *wb); @@ -360,11 +368,12 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, bool wb_over_bg_thresh(struct bdi_writeback *wb); +struct folio *writeback_iter(struct address_space *mapping, + struct writeback_control *wbc, struct folio *folio, int *error); + typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, void *data); -void tag_pages_for_writeback(struct address_space *mapping, - pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 01fa15506286..170fdee6339c 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -16,6 +16,7 @@ * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface * @WWAN_PORT_FIREHOSE: XML based command protocol * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems + * @WWAN_PORT_FASTBOOT: Fastboot protocol control * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -28,6 +29,7 @@ enum wwan_port_type { WWAN_PORT_QCDM, WWAN_PORT_FIREHOSE, WWAN_PORT_XMMRPC, + WWAN_PORT_FASTBOOT, /* Add new port types above this line */ diff --git a/include/linux/xarray.h b/include/linux/xarray.h index cb571dfcf4b1..0b618ec04115 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -12,14 +12,18 @@ #include <linux/bitmap.h> #include <linux/bug.h> #include <linux/compiler.h> +#include <linux/err.h> #include <linux/gfp.h> #include <linux/kconfig.h> -#include <linux/kernel.h> +#include <linux/limits.h> +#include <linux/lockdep.h> #include <linux/rcupdate.h> #include <linux/sched/mm.h> #include <linux/spinlock.h> #include <linux/types.h> +struct list_lru; + /* * The bottom two bits of the entry determine how the XArray interprets * the contents: @@ -1141,12 +1145,12 @@ static inline void xa_release(struct xarray *xa, unsigned long index) * doubled the number of slots per node, we'd get only 3 nodes per 4kB page. */ #ifndef XA_CHUNK_SHIFT -#define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#define XA_CHUNK_SHIFT (IS_ENABLED(CONFIG_BASE_SMALL) ? 4 : 6) #endif #define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT) #define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1) #define XA_MAX_MARKS 3 -#define XA_MARK_LONGS DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG) +#define XA_MARK_LONGS BITS_TO_LONGS(XA_CHUNK_SIZE) /* * @count is the count of every non-NULL element in the ->slots array @@ -1548,6 +1552,7 @@ void xas_create_range(struct xa_state *); #ifdef CONFIG_XARRAY_MULTI int xa_get_order(struct xarray *, unsigned long index); +int xas_get_order(struct xa_state *xas); void xas_split(struct xa_state *, void *entry, unsigned int order); void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); #else @@ -1556,6 +1561,11 @@ static inline int xa_get_order(struct xarray *xa, unsigned long index) return 0; } +static inline int xas_get_order(struct xa_state *xas) +{ + return 0; +} + static inline void xas_split(struct xa_state *xas, void *entry, unsigned int order) { diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 3296438eec06..a67d62b79698 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -53,7 +53,7 @@ void *zpool_map_handle(struct zpool *pool, unsigned long handle, void zpool_unmap_handle(struct zpool *pool, unsigned long handle); -u64 zpool_get_total_size(struct zpool *pool); +u64 zpool_get_total_pages(struct zpool *pool); /** @@ -91,7 +91,7 @@ struct zpool_driver { enum zpool_mapmode mm); void (*unmap)(void *pool, unsigned long handle); - u64 (*total_size)(void *pool); + u64 (*total_pages)(void *pool); }; void zpool_register_driver(struct zpool_driver *driver); diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 0b709f5bc65f..2a85b941db97 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -7,7 +7,6 @@ struct lruvec; -extern u64 zswap_pool_total_size; extern atomic_t zswap_stored_pages; #ifdef CONFIG_ZSWAP @@ -27,10 +26,11 @@ struct zswap_lruvec_state { atomic_long_t nr_zswap_protected; }; +unsigned long zswap_total_pages(void); bool zswap_store(struct folio *folio); bool zswap_load(struct folio *folio); -void zswap_invalidate(int type, pgoff_t offset); -void zswap_swapon(int type); +void zswap_invalidate(swp_entry_t swp); +int zswap_swapon(int type, unsigned long nr_pages); void zswap_swapoff(int type); void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg); void zswap_lruvec_state_init(struct lruvec *lruvec); @@ -50,8 +50,11 @@ static inline bool zswap_load(struct folio *folio) return false; } -static inline void zswap_invalidate(int type, pgoff_t offset) {} -static inline void zswap_swapon(int type) {} +static inline void zswap_invalidate(swp_entry_t swp) {} +static inline int zswap_swapon(int type, unsigned long nr_pages) +{ + return 0; +} static inline void zswap_swapoff(int type) {} static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {} static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {} |
